1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyEndpgm,
178     ImmTyHigh
179   };
180 
181   struct TokOp {
182     const char *Data;
183     unsigned Length;
184   };
185 
186   struct ImmOp {
187     int64_t Val;
188     ImmTy Type;
189     bool IsFPImm;
190     Modifiers Mods;
191   };
192 
193   struct RegOp {
194     unsigned RegNo;
195     bool IsForcedVOP3;
196     Modifiers Mods;
197   };
198 
199   union {
200     TokOp Tok;
201     ImmOp Imm;
202     RegOp Reg;
203     const MCExpr *Expr;
204   };
205 
206   bool isToken() const override {
207     if (Kind == Token)
208       return true;
209 
210     if (Kind != Expression || !Expr)
211       return false;
212 
213     // When parsing operands, we can't always tell if something was meant to be
214     // a token, like 'gds', or an expression that references a global variable.
215     // In this case, we assume the string is an expression, and if we need to
216     // interpret is a token, then we treat the symbol name as the token.
217     return isa<MCSymbolRefExpr>(Expr);
218   }
219 
220   bool isImm() const override {
221     return Kind == Immediate;
222   }
223 
224   bool isInlinableImm(MVT type) const;
225   bool isLiteralImm(MVT type) const;
226 
227   bool isRegKind() const {
228     return Kind == Register;
229   }
230 
231   bool isReg() const override {
232     return isRegKind() && !hasModifiers();
233   }
234 
235   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
236     return isRegClass(RCID) || isInlinableImm(type);
237   }
238 
239   bool isRegOrImmWithInt16InputMods() const {
240     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
241   }
242 
243   bool isRegOrImmWithInt32InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
245   }
246 
247   bool isRegOrImmWithInt64InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
249   }
250 
251   bool isRegOrImmWithFP16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
253   }
254 
255   bool isRegOrImmWithFP32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
257   }
258 
259   bool isRegOrImmWithFP64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
261   }
262 
263   bool isVReg() const {
264     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
265            isRegClass(AMDGPU::VReg_64RegClassID) ||
266            isRegClass(AMDGPU::VReg_96RegClassID) ||
267            isRegClass(AMDGPU::VReg_128RegClassID) ||
268            isRegClass(AMDGPU::VReg_256RegClassID) ||
269            isRegClass(AMDGPU::VReg_512RegClassID);
270   }
271 
272   bool isVReg32() const {
273     return isRegClass(AMDGPU::VGPR_32RegClassID);
274   }
275 
276   bool isVReg32OrOff() const {
277     return isOff() || isVReg32();
278   }
279 
280   bool isSDWAOperand(MVT type) const;
281   bool isSDWAFP16Operand() const;
282   bool isSDWAFP32Operand() const;
283   bool isSDWAInt16Operand() const;
284   bool isSDWAInt32Operand() const;
285 
286   bool isImmTy(ImmTy ImmT) const {
287     return isImm() && Imm.Type == ImmT;
288   }
289 
290   bool isImmModifier() const {
291     return isImm() && Imm.Type != ImmTyNone;
292   }
293 
294   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
295   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
296   bool isDMask() const { return isImmTy(ImmTyDMask); }
297   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
298   bool isDA() const { return isImmTy(ImmTyDA); }
299   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
300   bool isLWE() const { return isImmTy(ImmTyLWE); }
301   bool isOff() const { return isImmTy(ImmTyOff); }
302   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
303   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
304   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
305   bool isOffen() const { return isImmTy(ImmTyOffen); }
306   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
307   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
308   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
309   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
310   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
311 
312   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
313   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
314   bool isGDS() const { return isImmTy(ImmTyGDS); }
315   bool isLDS() const { return isImmTy(ImmTyLDS); }
316   bool isGLC() const { return isImmTy(ImmTyGLC); }
317   bool isSLC() const { return isImmTy(ImmTySLC); }
318   bool isTFE() const { return isImmTy(ImmTyTFE); }
319   bool isD16() const { return isImmTy(ImmTyD16); }
320   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
321   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
322   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
323   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
324   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
325   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
326   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
327   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
328   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
329   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
330   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
331   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
332   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
333   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
334   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
335   bool isHigh() const { return isImmTy(ImmTyHigh); }
336 
337   bool isMod() const {
338     return isClampSI() || isOModSI();
339   }
340 
341   bool isRegOrImm() const {
342     return isReg() || isImm();
343   }
344 
345   bool isRegClass(unsigned RCID) const;
346 
347   bool isInlineValue() const;
348 
349   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
350     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
351   }
352 
353   bool isSCSrcB16() const {
354     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
355   }
356 
357   bool isSCSrcV2B16() const {
358     return isSCSrcB16();
359   }
360 
361   bool isSCSrcB32() const {
362     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
363   }
364 
365   bool isSCSrcB64() const {
366     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
367   }
368 
369   bool isSCSrcF16() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
371   }
372 
373   bool isSCSrcV2F16() const {
374     return isSCSrcF16();
375   }
376 
377   bool isSCSrcF32() const {
378     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
379   }
380 
381   bool isSCSrcF64() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
383   }
384 
385   bool isSSrcB32() const {
386     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
387   }
388 
389   bool isSSrcB16() const {
390     return isSCSrcB16() || isLiteralImm(MVT::i16);
391   }
392 
393   bool isSSrcV2B16() const {
394     llvm_unreachable("cannot happen");
395     return isSSrcB16();
396   }
397 
398   bool isSSrcB64() const {
399     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
400     // See isVSrc64().
401     return isSCSrcB64() || isLiteralImm(MVT::i64);
402   }
403 
404   bool isSSrcF32() const {
405     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
406   }
407 
408   bool isSSrcF64() const {
409     return isSCSrcB64() || isLiteralImm(MVT::f64);
410   }
411 
412   bool isSSrcF16() const {
413     return isSCSrcB16() || isLiteralImm(MVT::f16);
414   }
415 
416   bool isSSrcV2F16() const {
417     llvm_unreachable("cannot happen");
418     return isSSrcF16();
419   }
420 
421   bool isSSrcOrLdsB32() const {
422     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
423            isLiteralImm(MVT::i32) || isExpr();
424   }
425 
426   bool isVCSrcB32() const {
427     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
428   }
429 
430   bool isVCSrcB64() const {
431     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
432   }
433 
434   bool isVCSrcB16() const {
435     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
436   }
437 
438   bool isVCSrcV2B16() const {
439     return isVCSrcB16();
440   }
441 
442   bool isVCSrcF32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
444   }
445 
446   bool isVCSrcF64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
448   }
449 
450   bool isVCSrcF16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
452   }
453 
454   bool isVCSrcV2F16() const {
455     return isVCSrcF16();
456   }
457 
458   bool isVSrcB32() const {
459     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
460   }
461 
462   bool isVSrcB64() const {
463     return isVCSrcF64() || isLiteralImm(MVT::i64);
464   }
465 
466   bool isVSrcB16() const {
467     return isVCSrcF16() || isLiteralImm(MVT::i16);
468   }
469 
470   bool isVSrcV2B16() const {
471     llvm_unreachable("cannot happen");
472     return isVSrcB16();
473   }
474 
475   bool isVSrcF32() const {
476     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
477   }
478 
479   bool isVSrcF64() const {
480     return isVCSrcF64() || isLiteralImm(MVT::f64);
481   }
482 
483   bool isVSrcF16() const {
484     return isVCSrcF16() || isLiteralImm(MVT::f16);
485   }
486 
487   bool isVSrcV2F16() const {
488     llvm_unreachable("cannot happen");
489     return isVSrcF16();
490   }
491 
492   bool isKImmFP32() const {
493     return isLiteralImm(MVT::f32);
494   }
495 
496   bool isKImmFP16() const {
497     return isLiteralImm(MVT::f16);
498   }
499 
500   bool isMem() const override {
501     return false;
502   }
503 
504   bool isExpr() const {
505     return Kind == Expression;
506   }
507 
508   bool isSoppBrTarget() const {
509     return isExpr() || isImm();
510   }
511 
512   bool isSWaitCnt() const;
513   bool isHwreg() const;
514   bool isSendMsg() const;
515   bool isSwizzle() const;
516   bool isSMRDOffset8() const;
517   bool isSMRDOffset20() const;
518   bool isSMRDLiteralOffset() const;
519   bool isDPPCtrl() const;
520   bool isGPRIdxMode() const;
521   bool isS16Imm() const;
522   bool isU16Imm() const;
523   bool isEndpgm() const;
524 
525   StringRef getExpressionAsToken() const {
526     assert(isExpr());
527     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
528     return S->getSymbol().getName();
529   }
530 
531   StringRef getToken() const {
532     assert(isToken());
533 
534     if (Kind == Expression)
535       return getExpressionAsToken();
536 
537     return StringRef(Tok.Data, Tok.Length);
538   }
539 
540   int64_t getImm() const {
541     assert(isImm());
542     return Imm.Val;
543   }
544 
545   ImmTy getImmTy() const {
546     assert(isImm());
547     return Imm.Type;
548   }
549 
550   unsigned getReg() const override {
551     return Reg.RegNo;
552   }
553 
554   SMLoc getStartLoc() const override {
555     return StartLoc;
556   }
557 
558   SMLoc getEndLoc() const override {
559     return EndLoc;
560   }
561 
562   SMRange getLocRange() const {
563     return SMRange(StartLoc, EndLoc);
564   }
565 
566   Modifiers getModifiers() const {
567     assert(isRegKind() || isImmTy(ImmTyNone));
568     return isRegKind() ? Reg.Mods : Imm.Mods;
569   }
570 
571   void setModifiers(Modifiers Mods) {
572     assert(isRegKind() || isImmTy(ImmTyNone));
573     if (isRegKind())
574       Reg.Mods = Mods;
575     else
576       Imm.Mods = Mods;
577   }
578 
579   bool hasModifiers() const {
580     return getModifiers().hasModifiers();
581   }
582 
583   bool hasFPModifiers() const {
584     return getModifiers().hasFPModifiers();
585   }
586 
587   bool hasIntModifiers() const {
588     return getModifiers().hasIntModifiers();
589   }
590 
591   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
592 
593   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
594 
595   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
596 
597   template <unsigned Bitwidth>
598   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
599 
600   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
601     addKImmFPOperands<16>(Inst, N);
602   }
603 
604   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
605     addKImmFPOperands<32>(Inst, N);
606   }
607 
608   void addRegOperands(MCInst &Inst, unsigned N) const;
609 
610   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
611     if (isRegKind())
612       addRegOperands(Inst, N);
613     else if (isExpr())
614       Inst.addOperand(MCOperand::createExpr(Expr));
615     else
616       addImmOperands(Inst, N);
617   }
618 
619   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
620     Modifiers Mods = getModifiers();
621     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
622     if (isRegKind()) {
623       addRegOperands(Inst, N);
624     } else {
625       addImmOperands(Inst, N, false);
626     }
627   }
628 
629   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
630     assert(!hasIntModifiers());
631     addRegOrImmWithInputModsOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
635     assert(!hasFPModifiers());
636     addRegOrImmWithInputModsOperands(Inst, N);
637   }
638 
639   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
640     Modifiers Mods = getModifiers();
641     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
642     assert(isRegKind());
643     addRegOperands(Inst, N);
644   }
645 
646   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
647     assert(!hasIntModifiers());
648     addRegWithInputModsOperands(Inst, N);
649   }
650 
651   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
652     assert(!hasFPModifiers());
653     addRegWithInputModsOperands(Inst, N);
654   }
655 
656   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
657     if (isImm())
658       addImmOperands(Inst, N);
659     else {
660       assert(isExpr());
661       Inst.addOperand(MCOperand::createExpr(Expr));
662     }
663   }
664 
665   static void printImmTy(raw_ostream& OS, ImmTy Type) {
666     switch (Type) {
667     case ImmTyNone: OS << "None"; break;
668     case ImmTyGDS: OS << "GDS"; break;
669     case ImmTyLDS: OS << "LDS"; break;
670     case ImmTyOffen: OS << "Offen"; break;
671     case ImmTyIdxen: OS << "Idxen"; break;
672     case ImmTyAddr64: OS << "Addr64"; break;
673     case ImmTyOffset: OS << "Offset"; break;
674     case ImmTyInstOffset: OS << "InstOffset"; break;
675     case ImmTyOffset0: OS << "Offset0"; break;
676     case ImmTyOffset1: OS << "Offset1"; break;
677     case ImmTyGLC: OS << "GLC"; break;
678     case ImmTySLC: OS << "SLC"; break;
679     case ImmTyTFE: OS << "TFE"; break;
680     case ImmTyD16: OS << "D16"; break;
681     case ImmTyFORMAT: OS << "FORMAT"; break;
682     case ImmTyClampSI: OS << "ClampSI"; break;
683     case ImmTyOModSI: OS << "OModSI"; break;
684     case ImmTyDppCtrl: OS << "DppCtrl"; break;
685     case ImmTyDppRowMask: OS << "DppRowMask"; break;
686     case ImmTyDppBankMask: OS << "DppBankMask"; break;
687     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
688     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
689     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
690     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
691     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
692     case ImmTyDMask: OS << "DMask"; break;
693     case ImmTyUNorm: OS << "UNorm"; break;
694     case ImmTyDA: OS << "DA"; break;
695     case ImmTyR128A16: OS << "R128A16"; break;
696     case ImmTyLWE: OS << "LWE"; break;
697     case ImmTyOff: OS << "Off"; break;
698     case ImmTyExpTgt: OS << "ExpTgt"; break;
699     case ImmTyExpCompr: OS << "ExpCompr"; break;
700     case ImmTyExpVM: OS << "ExpVM"; break;
701     case ImmTyHwreg: OS << "Hwreg"; break;
702     case ImmTySendMsg: OS << "SendMsg"; break;
703     case ImmTyInterpSlot: OS << "InterpSlot"; break;
704     case ImmTyInterpAttr: OS << "InterpAttr"; break;
705     case ImmTyAttrChan: OS << "AttrChan"; break;
706     case ImmTyOpSel: OS << "OpSel"; break;
707     case ImmTyOpSelHi: OS << "OpSelHi"; break;
708     case ImmTyNegLo: OS << "NegLo"; break;
709     case ImmTyNegHi: OS << "NegHi"; break;
710     case ImmTySwizzle: OS << "Swizzle"; break;
711     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
712     case ImmTyHigh: OS << "High"; break;
713     case ImmTyEndpgm:
714       OS << "Endpgm";
715       break;
716     }
717   }
718 
719   void print(raw_ostream &OS) const override {
720     switch (Kind) {
721     case Register:
722       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
723       break;
724     case Immediate:
725       OS << '<' << getImm();
726       if (getImmTy() != ImmTyNone) {
727         OS << " type: "; printImmTy(OS, getImmTy());
728       }
729       OS << " mods: " << Imm.Mods << '>';
730       break;
731     case Token:
732       OS << '\'' << getToken() << '\'';
733       break;
734     case Expression:
735       OS << "<expr " << *Expr << '>';
736       break;
737     }
738   }
739 
740   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
741                                       int64_t Val, SMLoc Loc,
742                                       ImmTy Type = ImmTyNone,
743                                       bool IsFPImm = false) {
744     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
745     Op->Imm.Val = Val;
746     Op->Imm.IsFPImm = IsFPImm;
747     Op->Imm.Type = Type;
748     Op->Imm.Mods = Modifiers();
749     Op->StartLoc = Loc;
750     Op->EndLoc = Loc;
751     return Op;
752   }
753 
754   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
755                                         StringRef Str, SMLoc Loc,
756                                         bool HasExplicitEncodingSize = true) {
757     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
758     Res->Tok.Data = Str.data();
759     Res->Tok.Length = Str.size();
760     Res->StartLoc = Loc;
761     Res->EndLoc = Loc;
762     return Res;
763   }
764 
765   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
766                                       unsigned RegNo, SMLoc S,
767                                       SMLoc E,
768                                       bool ForceVOP3) {
769     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
770     Op->Reg.RegNo = RegNo;
771     Op->Reg.Mods = Modifiers();
772     Op->Reg.IsForcedVOP3 = ForceVOP3;
773     Op->StartLoc = S;
774     Op->EndLoc = E;
775     return Op;
776   }
777 
778   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
779                                        const class MCExpr *Expr, SMLoc S) {
780     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
781     Op->Expr = Expr;
782     Op->StartLoc = S;
783     Op->EndLoc = S;
784     return Op;
785   }
786 };
787 
788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
789   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
790   return OS;
791 }
792 
793 //===----------------------------------------------------------------------===//
794 // AsmParser
795 //===----------------------------------------------------------------------===//
796 
797 // Holds info related to the current kernel, e.g. count of SGPRs used.
798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
799 // .amdgpu_hsa_kernel or at EOF.
800 class KernelScopeInfo {
801   int SgprIndexUnusedMin = -1;
802   int VgprIndexUnusedMin = -1;
803   MCContext *Ctx = nullptr;
804 
805   void usesSgprAt(int i) {
806     if (i >= SgprIndexUnusedMin) {
807       SgprIndexUnusedMin = ++i;
808       if (Ctx) {
809         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
810         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
811       }
812     }
813   }
814 
815   void usesVgprAt(int i) {
816     if (i >= VgprIndexUnusedMin) {
817       VgprIndexUnusedMin = ++i;
818       if (Ctx) {
819         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
820         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
821       }
822     }
823   }
824 
825 public:
826   KernelScopeInfo() = default;
827 
828   void initialize(MCContext &Context) {
829     Ctx = &Context;
830     usesSgprAt(SgprIndexUnusedMin = -1);
831     usesVgprAt(VgprIndexUnusedMin = -1);
832   }
833 
834   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
835     switch (RegKind) {
836       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
837       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
838       default: break;
839     }
840   }
841 };
842 
843 class AMDGPUAsmParser : public MCTargetAsmParser {
844   MCAsmParser &Parser;
845 
846   // Number of extra operands parsed after the first optional operand.
847   // This may be necessary to skip hardcoded mandatory operands.
848   static const unsigned MAX_OPR_LOOKAHEAD = 8;
849 
850   unsigned ForcedEncodingSize = 0;
851   bool ForcedDPP = false;
852   bool ForcedSDWA = false;
853   KernelScopeInfo KernelScope;
854 
855   /// @name Auto-generated Match Functions
856   /// {
857 
858 #define GET_ASSEMBLER_HEADER
859 #include "AMDGPUGenAsmMatcher.inc"
860 
861   /// }
862 
863 private:
864   bool ParseAsAbsoluteExpression(uint32_t &Ret);
865   bool OutOfRangeError(SMRange Range);
866   /// Calculate VGPR/SGPR blocks required for given target, reserved
867   /// registers, and user-specified NextFreeXGPR values.
868   ///
869   /// \param Features [in] Target features, used for bug corrections.
870   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
871   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
872   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
873   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
874   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
875   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
876   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
877   /// \param VGPRBlocks [out] Result VGPR block count.
878   /// \param SGPRBlocks [out] Result SGPR block count.
879   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
880                           bool FlatScrUsed, bool XNACKUsed,
881                           unsigned NextFreeVGPR, SMRange VGPRRange,
882                           unsigned NextFreeSGPR, SMRange SGPRRange,
883                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
884   bool ParseDirectiveAMDGCNTarget();
885   bool ParseDirectiveAMDHSAKernel();
886   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
887   bool ParseDirectiveHSACodeObjectVersion();
888   bool ParseDirectiveHSACodeObjectISA();
889   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
890   bool ParseDirectiveAMDKernelCodeT();
891   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
892   bool ParseDirectiveAMDGPUHsaKernel();
893 
894   bool ParseDirectiveISAVersion();
895   bool ParseDirectiveHSAMetadata();
896   bool ParseDirectivePALMetadataBegin();
897   bool ParseDirectivePALMetadata();
898 
899   /// Common code to parse out a block of text (typically YAML) between start and
900   /// end directives.
901   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
902                            const char *AssemblerDirectiveEnd,
903                            std::string &CollectString);
904 
905   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
906                              RegisterKind RegKind, unsigned Reg1,
907                              unsigned RegNum);
908   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
909                            unsigned& RegNum, unsigned& RegWidth,
910                            unsigned *DwordRegIndex);
911   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
912   void initializeGprCountSymbol(RegisterKind RegKind);
913   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
914                              unsigned RegWidth);
915   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
916                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
917   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
918                  bool IsGdsHardcoded);
919 
920 public:
921   enum AMDGPUMatchResultTy {
922     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
923   };
924 
925   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
926 
927   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
928                const MCInstrInfo &MII,
929                const MCTargetOptions &Options)
930       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
931     MCAsmParserExtension::Initialize(Parser);
932 
933     if (getFeatureBits().none()) {
934       // Set default features.
935       copySTI().ToggleFeature("southern-islands");
936     }
937 
938     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
939 
940     {
941       // TODO: make those pre-defined variables read-only.
942       // Currently there is none suitable machinery in the core llvm-mc for this.
943       // MCSymbol::isRedefinable is intended for another purpose, and
944       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
945       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
946       MCContext &Ctx = getContext();
947       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
948         MCSymbol *Sym =
949             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
950         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
951         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
952         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
953         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
954         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
955       } else {
956         MCSymbol *Sym =
957             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
958         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
959         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
963       }
964       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
965         initializeGprCountSymbol(IS_VGPR);
966         initializeGprCountSymbol(IS_SGPR);
967       } else
968         KernelScope.initialize(getContext());
969     }
970   }
971 
972   bool hasXNACK() const {
973     return AMDGPU::hasXNACK(getSTI());
974   }
975 
976   bool hasMIMG_R128() const {
977     return AMDGPU::hasMIMG_R128(getSTI());
978   }
979 
980   bool hasPackedD16() const {
981     return AMDGPU::hasPackedD16(getSTI());
982   }
983 
984   bool isSI() const {
985     return AMDGPU::isSI(getSTI());
986   }
987 
988   bool isCI() const {
989     return AMDGPU::isCI(getSTI());
990   }
991 
992   bool isVI() const {
993     return AMDGPU::isVI(getSTI());
994   }
995 
996   bool isGFX9() const {
997     return AMDGPU::isGFX9(getSTI());
998   }
999 
1000   bool hasInv2PiInlineImm() const {
1001     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1002   }
1003 
1004   bool hasFlatOffsets() const {
1005     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1006   }
1007 
1008   bool hasSGPR102_SGPR103() const {
1009     return !isVI();
1010   }
1011 
1012   bool hasIntClamp() const {
1013     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1014   }
1015 
1016   AMDGPUTargetStreamer &getTargetStreamer() {
1017     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1018     return static_cast<AMDGPUTargetStreamer &>(TS);
1019   }
1020 
1021   const MCRegisterInfo *getMRI() const {
1022     // We need this const_cast because for some reason getContext() is not const
1023     // in MCAsmParser.
1024     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1025   }
1026 
1027   const MCInstrInfo *getMII() const {
1028     return &MII;
1029   }
1030 
1031   const FeatureBitset &getFeatureBits() const {
1032     return getSTI().getFeatureBits();
1033   }
1034 
1035   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1036   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1037   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1038 
1039   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1040   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1041   bool isForcedDPP() const { return ForcedDPP; }
1042   bool isForcedSDWA() const { return ForcedSDWA; }
1043   ArrayRef<unsigned> getMatchedVariants() const;
1044 
1045   std::unique_ptr<AMDGPUOperand> parseRegister();
1046   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1047   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1048   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1049                                       unsigned Kind) override;
1050   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1051                                OperandVector &Operands, MCStreamer &Out,
1052                                uint64_t &ErrorInfo,
1053                                bool MatchingInlineAsm) override;
1054   bool ParseDirective(AsmToken DirectiveID) override;
1055   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1056   StringRef parseMnemonicSuffix(StringRef Name);
1057   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1058                         SMLoc NameLoc, OperandVector &Operands) override;
1059   //bool ProcessInstruction(MCInst &Inst);
1060 
1061   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1062 
1063   OperandMatchResultTy
1064   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1065                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1066                      bool (*ConvertResult)(int64_t &) = nullptr);
1067 
1068   OperandMatchResultTy parseOperandArrayWithPrefix(
1069     const char *Prefix,
1070     OperandVector &Operands,
1071     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1072     bool (*ConvertResult)(int64_t&) = nullptr);
1073 
1074   OperandMatchResultTy
1075   parseNamedBit(const char *Name, OperandVector &Operands,
1076                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1077   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1078                                              StringRef &Value);
1079 
1080   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1081   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1082   OperandMatchResultTy parseReg(OperandVector &Operands);
1083   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1084   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1085   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1086   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1087   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1088   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1089   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1090 
1091   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1092   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1093   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1094   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1095 
1096   bool parseCnt(int64_t &IntVal);
1097   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1098   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1099 
1100 private:
1101   struct OperandInfoTy {
1102     int64_t Id;
1103     bool IsSymbolic = false;
1104 
1105     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1106   };
1107 
1108   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1109   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1110 
1111   void errorExpTgt();
1112   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1113 
1114   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1115   bool validateSOPLiteral(const MCInst &Inst) const;
1116   bool validateConstantBusLimitations(const MCInst &Inst);
1117   bool validateEarlyClobberLimitations(const MCInst &Inst);
1118   bool validateIntClampSupported(const MCInst &Inst);
1119   bool validateMIMGAtomicDMask(const MCInst &Inst);
1120   bool validateMIMGGatherDMask(const MCInst &Inst);
1121   bool validateMIMGDataSize(const MCInst &Inst);
1122   bool validateMIMGD16(const MCInst &Inst);
1123   bool validateLdsDirect(const MCInst &Inst);
1124   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1125   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1126   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1127 
1128   bool trySkipId(const StringRef Id);
1129   bool trySkipToken(const AsmToken::TokenKind Kind);
1130   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1131   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1132   bool parseExpr(int64_t &Imm);
1133 
1134 public:
1135   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1136   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1137 
1138   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1139   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1140   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1141   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1142   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1143 
1144   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1145                             const unsigned MinVal,
1146                             const unsigned MaxVal,
1147                             const StringRef ErrMsg);
1148   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1149   bool parseSwizzleOffset(int64_t &Imm);
1150   bool parseSwizzleMacro(int64_t &Imm);
1151   bool parseSwizzleQuadPerm(int64_t &Imm);
1152   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1153   bool parseSwizzleBroadcast(int64_t &Imm);
1154   bool parseSwizzleSwap(int64_t &Imm);
1155   bool parseSwizzleReverse(int64_t &Imm);
1156 
1157   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1158   int64_t parseGPRIdxMacro();
1159 
1160   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1161   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1162   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1163   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1164   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1165 
1166   AMDGPUOperand::Ptr defaultGLC() const;
1167   AMDGPUOperand::Ptr defaultSLC() const;
1168 
1169   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1170   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1171   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1172   AMDGPUOperand::Ptr defaultOffsetU12() const;
1173   AMDGPUOperand::Ptr defaultOffsetS13() const;
1174 
1175   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1176 
1177   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1178                OptionalImmIndexMap &OptionalIdx);
1179   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1180   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1181   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1182 
1183   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1184 
1185   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1186                bool IsAtomic = false);
1187   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1188 
1189   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1190   AMDGPUOperand::Ptr defaultRowMask() const;
1191   AMDGPUOperand::Ptr defaultBankMask() const;
1192   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1193   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1194 
1195   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1196                                     AMDGPUOperand::ImmTy Type);
1197   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1198   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1199   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1200   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1201   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1202   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1203                 uint64_t BasicInstType, bool skipVcc = false);
1204 
1205   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1206   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1207 };
1208 
1209 struct OptionalOperand {
1210   const char *Name;
1211   AMDGPUOperand::ImmTy Type;
1212   bool IsBit;
1213   bool (*ConvertResult)(int64_t&);
1214 };
1215 
1216 } // end anonymous namespace
1217 
1218 // May be called with integer type with equivalent bitwidth.
1219 static const fltSemantics *getFltSemantics(unsigned Size) {
1220   switch (Size) {
1221   case 4:
1222     return &APFloat::IEEEsingle();
1223   case 8:
1224     return &APFloat::IEEEdouble();
1225   case 2:
1226     return &APFloat::IEEEhalf();
1227   default:
1228     llvm_unreachable("unsupported fp type");
1229   }
1230 }
1231 
1232 static const fltSemantics *getFltSemantics(MVT VT) {
1233   return getFltSemantics(VT.getSizeInBits() / 8);
1234 }
1235 
1236 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1237   switch (OperandType) {
1238   case AMDGPU::OPERAND_REG_IMM_INT32:
1239   case AMDGPU::OPERAND_REG_IMM_FP32:
1240   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1241   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1242     return &APFloat::IEEEsingle();
1243   case AMDGPU::OPERAND_REG_IMM_INT64:
1244   case AMDGPU::OPERAND_REG_IMM_FP64:
1245   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1246   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1247     return &APFloat::IEEEdouble();
1248   case AMDGPU::OPERAND_REG_IMM_INT16:
1249   case AMDGPU::OPERAND_REG_IMM_FP16:
1250   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1251   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1252   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1253   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1254     return &APFloat::IEEEhalf();
1255   default:
1256     llvm_unreachable("unsupported fp type");
1257   }
1258 }
1259 
1260 //===----------------------------------------------------------------------===//
1261 // Operand
1262 //===----------------------------------------------------------------------===//
1263 
1264 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1265   bool Lost;
1266 
1267   // Convert literal to single precision
1268   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1269                                                APFloat::rmNearestTiesToEven,
1270                                                &Lost);
1271   // We allow precision lost but not overflow or underflow
1272   if (Status != APFloat::opOK &&
1273       Lost &&
1274       ((Status & APFloat::opOverflow)  != 0 ||
1275        (Status & APFloat::opUnderflow) != 0)) {
1276     return false;
1277   }
1278 
1279   return true;
1280 }
1281 
1282 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1283   return isUIntN(Size, Val) || isIntN(Size, Val);
1284 }
1285 
1286 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1287 
1288   // This is a hack to enable named inline values like
1289   // shared_base with both 32-bit and 64-bit operands.
1290   // Note that these values are defined as
1291   // 32-bit operands only.
1292   if (isInlineValue()) {
1293     return true;
1294   }
1295 
1296   if (!isImmTy(ImmTyNone)) {
1297     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1298     return false;
1299   }
1300   // TODO: We should avoid using host float here. It would be better to
1301   // check the float bit values which is what a few other places do.
1302   // We've had bot failures before due to weird NaN support on mips hosts.
1303 
1304   APInt Literal(64, Imm.Val);
1305 
1306   if (Imm.IsFPImm) { // We got fp literal token
1307     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1308       return AMDGPU::isInlinableLiteral64(Imm.Val,
1309                                           AsmParser->hasInv2PiInlineImm());
1310     }
1311 
1312     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1313     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1314       return false;
1315 
1316     if (type.getScalarSizeInBits() == 16) {
1317       return AMDGPU::isInlinableLiteral16(
1318         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1319         AsmParser->hasInv2PiInlineImm());
1320     }
1321 
1322     // Check if single precision literal is inlinable
1323     return AMDGPU::isInlinableLiteral32(
1324       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1325       AsmParser->hasInv2PiInlineImm());
1326   }
1327 
1328   // We got int literal token.
1329   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1330     return AMDGPU::isInlinableLiteral64(Imm.Val,
1331                                         AsmParser->hasInv2PiInlineImm());
1332   }
1333 
1334   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1335     return false;
1336   }
1337 
1338   if (type.getScalarSizeInBits() == 16) {
1339     return AMDGPU::isInlinableLiteral16(
1340       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1341       AsmParser->hasInv2PiInlineImm());
1342   }
1343 
1344   return AMDGPU::isInlinableLiteral32(
1345     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1346     AsmParser->hasInv2PiInlineImm());
1347 }
1348 
1349 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1350   // Check that this immediate can be added as literal
1351   if (!isImmTy(ImmTyNone)) {
1352     return false;
1353   }
1354 
1355   if (!Imm.IsFPImm) {
1356     // We got int literal token.
1357 
1358     if (type == MVT::f64 && hasFPModifiers()) {
1359       // Cannot apply fp modifiers to int literals preserving the same semantics
1360       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1361       // disable these cases.
1362       return false;
1363     }
1364 
1365     unsigned Size = type.getSizeInBits();
1366     if (Size == 64)
1367       Size = 32;
1368 
1369     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1370     // types.
1371     return isSafeTruncation(Imm.Val, Size);
1372   }
1373 
1374   // We got fp literal token
1375   if (type == MVT::f64) { // Expected 64-bit fp operand
1376     // We would set low 64-bits of literal to zeroes but we accept this literals
1377     return true;
1378   }
1379 
1380   if (type == MVT::i64) { // Expected 64-bit int operand
1381     // We don't allow fp literals in 64-bit integer instructions. It is
1382     // unclear how we should encode them.
1383     return false;
1384   }
1385 
1386   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1387   return canLosslesslyConvertToFPType(FPLiteral, type);
1388 }
1389 
1390 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1391   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1392 }
1393 
1394 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1395   if (AsmParser->isVI())
1396     return isVReg32();
1397   else if (AsmParser->isGFX9())
1398     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1399   else
1400     return false;
1401 }
1402 
1403 bool AMDGPUOperand::isSDWAFP16Operand() const {
1404   return isSDWAOperand(MVT::f16);
1405 }
1406 
1407 bool AMDGPUOperand::isSDWAFP32Operand() const {
1408   return isSDWAOperand(MVT::f32);
1409 }
1410 
1411 bool AMDGPUOperand::isSDWAInt16Operand() const {
1412   return isSDWAOperand(MVT::i16);
1413 }
1414 
1415 bool AMDGPUOperand::isSDWAInt32Operand() const {
1416   return isSDWAOperand(MVT::i32);
1417 }
1418 
1419 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1420 {
1421   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1422   assert(Size == 2 || Size == 4 || Size == 8);
1423 
1424   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1425 
1426   if (Imm.Mods.Abs) {
1427     Val &= ~FpSignMask;
1428   }
1429   if (Imm.Mods.Neg) {
1430     Val ^= FpSignMask;
1431   }
1432 
1433   return Val;
1434 }
1435 
1436 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1437   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1438                              Inst.getNumOperands())) {
1439     addLiteralImmOperand(Inst, Imm.Val,
1440                          ApplyModifiers &
1441                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1442   } else {
1443     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1444     Inst.addOperand(MCOperand::createImm(Imm.Val));
1445   }
1446 }
1447 
1448 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1449   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1450   auto OpNum = Inst.getNumOperands();
1451   // Check that this operand accepts literals
1452   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1453 
1454   if (ApplyModifiers) {
1455     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1456     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1457     Val = applyInputFPModifiers(Val, Size);
1458   }
1459 
1460   APInt Literal(64, Val);
1461   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1462 
1463   if (Imm.IsFPImm) { // We got fp literal token
1464     switch (OpTy) {
1465     case AMDGPU::OPERAND_REG_IMM_INT64:
1466     case AMDGPU::OPERAND_REG_IMM_FP64:
1467     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1468     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1469       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1470                                        AsmParser->hasInv2PiInlineImm())) {
1471         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1472         return;
1473       }
1474 
1475       // Non-inlineable
1476       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1477         // For fp operands we check if low 32 bits are zeros
1478         if (Literal.getLoBits(32) != 0) {
1479           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1480           "Can't encode literal as exact 64-bit floating-point operand. "
1481           "Low 32-bits will be set to zero");
1482         }
1483 
1484         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1485         return;
1486       }
1487 
1488       // We don't allow fp literals in 64-bit integer instructions. It is
1489       // unclear how we should encode them. This case should be checked earlier
1490       // in predicate methods (isLiteralImm())
1491       llvm_unreachable("fp literal in 64-bit integer instruction.");
1492 
1493     case AMDGPU::OPERAND_REG_IMM_INT32:
1494     case AMDGPU::OPERAND_REG_IMM_FP32:
1495     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1496     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1497     case AMDGPU::OPERAND_REG_IMM_INT16:
1498     case AMDGPU::OPERAND_REG_IMM_FP16:
1499     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1500     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1501     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1502     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1503       bool lost;
1504       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1505       // Convert literal to single precision
1506       FPLiteral.convert(*getOpFltSemantics(OpTy),
1507                         APFloat::rmNearestTiesToEven, &lost);
1508       // We allow precision lost but not overflow or underflow. This should be
1509       // checked earlier in isLiteralImm()
1510 
1511       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1512       Inst.addOperand(MCOperand::createImm(ImmVal));
1513       return;
1514     }
1515     default:
1516       llvm_unreachable("invalid operand size");
1517     }
1518 
1519     return;
1520   }
1521 
1522   // We got int literal token.
1523   // Only sign extend inline immediates.
1524   switch (OpTy) {
1525   case AMDGPU::OPERAND_REG_IMM_INT32:
1526   case AMDGPU::OPERAND_REG_IMM_FP32:
1527   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1528   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1529     if (isSafeTruncation(Val, 32) &&
1530         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1531                                      AsmParser->hasInv2PiInlineImm())) {
1532       Inst.addOperand(MCOperand::createImm(Val));
1533       return;
1534     }
1535 
1536     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1537     return;
1538 
1539   case AMDGPU::OPERAND_REG_IMM_INT64:
1540   case AMDGPU::OPERAND_REG_IMM_FP64:
1541   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1542   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1543     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1544       Inst.addOperand(MCOperand::createImm(Val));
1545       return;
1546     }
1547 
1548     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1549     return;
1550 
1551   case AMDGPU::OPERAND_REG_IMM_INT16:
1552   case AMDGPU::OPERAND_REG_IMM_FP16:
1553   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1554   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1555     if (isSafeTruncation(Val, 16) &&
1556         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1557                                      AsmParser->hasInv2PiInlineImm())) {
1558       Inst.addOperand(MCOperand::createImm(Val));
1559       return;
1560     }
1561 
1562     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1563     return;
1564 
1565   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1566   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1567     assert(isSafeTruncation(Val, 16));
1568     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1569                                         AsmParser->hasInv2PiInlineImm()));
1570 
1571     Inst.addOperand(MCOperand::createImm(Val));
1572     return;
1573   }
1574   default:
1575     llvm_unreachable("invalid operand size");
1576   }
1577 }
1578 
1579 template <unsigned Bitwidth>
1580 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1581   APInt Literal(64, Imm.Val);
1582 
1583   if (!Imm.IsFPImm) {
1584     // We got int literal token.
1585     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1586     return;
1587   }
1588 
1589   bool Lost;
1590   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1591   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1592                     APFloat::rmNearestTiesToEven, &Lost);
1593   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1594 }
1595 
1596 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1597   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1598 }
1599 
1600 static bool isInlineValue(unsigned Reg) {
1601   switch (Reg) {
1602   case AMDGPU::SRC_SHARED_BASE:
1603   case AMDGPU::SRC_SHARED_LIMIT:
1604   case AMDGPU::SRC_PRIVATE_BASE:
1605   case AMDGPU::SRC_PRIVATE_LIMIT:
1606   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1607     return true;
1608   default:
1609     return false;
1610   }
1611 }
1612 
1613 bool AMDGPUOperand::isInlineValue() const {
1614   return isRegKind() && ::isInlineValue(getReg());
1615 }
1616 
1617 //===----------------------------------------------------------------------===//
1618 // AsmParser
1619 //===----------------------------------------------------------------------===//
1620 
1621 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1622   if (Is == IS_VGPR) {
1623     switch (RegWidth) {
1624       default: return -1;
1625       case 1: return AMDGPU::VGPR_32RegClassID;
1626       case 2: return AMDGPU::VReg_64RegClassID;
1627       case 3: return AMDGPU::VReg_96RegClassID;
1628       case 4: return AMDGPU::VReg_128RegClassID;
1629       case 8: return AMDGPU::VReg_256RegClassID;
1630       case 16: return AMDGPU::VReg_512RegClassID;
1631     }
1632   } else if (Is == IS_TTMP) {
1633     switch (RegWidth) {
1634       default: return -1;
1635       case 1: return AMDGPU::TTMP_32RegClassID;
1636       case 2: return AMDGPU::TTMP_64RegClassID;
1637       case 4: return AMDGPU::TTMP_128RegClassID;
1638       case 8: return AMDGPU::TTMP_256RegClassID;
1639       case 16: return AMDGPU::TTMP_512RegClassID;
1640     }
1641   } else if (Is == IS_SGPR) {
1642     switch (RegWidth) {
1643       default: return -1;
1644       case 1: return AMDGPU::SGPR_32RegClassID;
1645       case 2: return AMDGPU::SGPR_64RegClassID;
1646       case 4: return AMDGPU::SGPR_128RegClassID;
1647       case 8: return AMDGPU::SGPR_256RegClassID;
1648       case 16: return AMDGPU::SGPR_512RegClassID;
1649     }
1650   }
1651   return -1;
1652 }
1653 
1654 static unsigned getSpecialRegForName(StringRef RegName) {
1655   return StringSwitch<unsigned>(RegName)
1656     .Case("exec", AMDGPU::EXEC)
1657     .Case("vcc", AMDGPU::VCC)
1658     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1659     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1660     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1661     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1662     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1663     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1664     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1665     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1666     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1667     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1668     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1669     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1670     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1671     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1672     .Case("m0", AMDGPU::M0)
1673     .Case("scc", AMDGPU::SCC)
1674     .Case("tba", AMDGPU::TBA)
1675     .Case("tma", AMDGPU::TMA)
1676     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1677     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1678     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1679     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1680     .Case("vcc_lo", AMDGPU::VCC_LO)
1681     .Case("vcc_hi", AMDGPU::VCC_HI)
1682     .Case("exec_lo", AMDGPU::EXEC_LO)
1683     .Case("exec_hi", AMDGPU::EXEC_HI)
1684     .Case("tma_lo", AMDGPU::TMA_LO)
1685     .Case("tma_hi", AMDGPU::TMA_HI)
1686     .Case("tba_lo", AMDGPU::TBA_LO)
1687     .Case("tba_hi", AMDGPU::TBA_HI)
1688     .Default(0);
1689 }
1690 
1691 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1692                                     SMLoc &EndLoc) {
1693   auto R = parseRegister();
1694   if (!R) return true;
1695   assert(R->isReg());
1696   RegNo = R->getReg();
1697   StartLoc = R->getStartLoc();
1698   EndLoc = R->getEndLoc();
1699   return false;
1700 }
1701 
1702 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1703                                             RegisterKind RegKind, unsigned Reg1,
1704                                             unsigned RegNum) {
1705   switch (RegKind) {
1706   case IS_SPECIAL:
1707     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1708       Reg = AMDGPU::EXEC;
1709       RegWidth = 2;
1710       return true;
1711     }
1712     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1713       Reg = AMDGPU::FLAT_SCR;
1714       RegWidth = 2;
1715       return true;
1716     }
1717     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1718       Reg = AMDGPU::XNACK_MASK;
1719       RegWidth = 2;
1720       return true;
1721     }
1722     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1723       Reg = AMDGPU::VCC;
1724       RegWidth = 2;
1725       return true;
1726     }
1727     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1728       Reg = AMDGPU::TBA;
1729       RegWidth = 2;
1730       return true;
1731     }
1732     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1733       Reg = AMDGPU::TMA;
1734       RegWidth = 2;
1735       return true;
1736     }
1737     return false;
1738   case IS_VGPR:
1739   case IS_SGPR:
1740   case IS_TTMP:
1741     if (Reg1 != Reg + RegWidth) {
1742       return false;
1743     }
1744     RegWidth++;
1745     return true;
1746   default:
1747     llvm_unreachable("unexpected register kind");
1748   }
1749 }
1750 
1751 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1752                                           unsigned &RegNum, unsigned &RegWidth,
1753                                           unsigned *DwordRegIndex) {
1754   if (DwordRegIndex) { *DwordRegIndex = 0; }
1755   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1756   if (getLexer().is(AsmToken::Identifier)) {
1757     StringRef RegName = Parser.getTok().getString();
1758     if ((Reg = getSpecialRegForName(RegName))) {
1759       Parser.Lex();
1760       RegKind = IS_SPECIAL;
1761     } else {
1762       unsigned RegNumIndex = 0;
1763       if (RegName[0] == 'v') {
1764         RegNumIndex = 1;
1765         RegKind = IS_VGPR;
1766       } else if (RegName[0] == 's') {
1767         RegNumIndex = 1;
1768         RegKind = IS_SGPR;
1769       } else if (RegName.startswith("ttmp")) {
1770         RegNumIndex = strlen("ttmp");
1771         RegKind = IS_TTMP;
1772       } else {
1773         return false;
1774       }
1775       if (RegName.size() > RegNumIndex) {
1776         // Single 32-bit register: vXX.
1777         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1778           return false;
1779         Parser.Lex();
1780         RegWidth = 1;
1781       } else {
1782         // Range of registers: v[XX:YY]. ":YY" is optional.
1783         Parser.Lex();
1784         int64_t RegLo, RegHi;
1785         if (getLexer().isNot(AsmToken::LBrac))
1786           return false;
1787         Parser.Lex();
1788 
1789         if (getParser().parseAbsoluteExpression(RegLo))
1790           return false;
1791 
1792         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1793         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1794           return false;
1795         Parser.Lex();
1796 
1797         if (isRBrace) {
1798           RegHi = RegLo;
1799         } else {
1800           if (getParser().parseAbsoluteExpression(RegHi))
1801             return false;
1802 
1803           if (getLexer().isNot(AsmToken::RBrac))
1804             return false;
1805           Parser.Lex();
1806         }
1807         RegNum = (unsigned) RegLo;
1808         RegWidth = (RegHi - RegLo) + 1;
1809       }
1810     }
1811   } else if (getLexer().is(AsmToken::LBrac)) {
1812     // List of consecutive registers: [s0,s1,s2,s3]
1813     Parser.Lex();
1814     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1815       return false;
1816     if (RegWidth != 1)
1817       return false;
1818     RegisterKind RegKind1;
1819     unsigned Reg1, RegNum1, RegWidth1;
1820     do {
1821       if (getLexer().is(AsmToken::Comma)) {
1822         Parser.Lex();
1823       } else if (getLexer().is(AsmToken::RBrac)) {
1824         Parser.Lex();
1825         break;
1826       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1827         if (RegWidth1 != 1) {
1828           return false;
1829         }
1830         if (RegKind1 != RegKind) {
1831           return false;
1832         }
1833         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1834           return false;
1835         }
1836       } else {
1837         return false;
1838       }
1839     } while (true);
1840   } else {
1841     return false;
1842   }
1843   switch (RegKind) {
1844   case IS_SPECIAL:
1845     RegNum = 0;
1846     RegWidth = 1;
1847     break;
1848   case IS_VGPR:
1849   case IS_SGPR:
1850   case IS_TTMP:
1851   {
1852     unsigned Size = 1;
1853     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1854       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1855       Size = std::min(RegWidth, 4u);
1856     }
1857     if (RegNum % Size != 0)
1858       return false;
1859     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1860     RegNum = RegNum / Size;
1861     int RCID = getRegClass(RegKind, RegWidth);
1862     if (RCID == -1)
1863       return false;
1864     const MCRegisterClass RC = TRI->getRegClass(RCID);
1865     if (RegNum >= RC.getNumRegs())
1866       return false;
1867     Reg = RC.getRegister(RegNum);
1868     break;
1869   }
1870 
1871   default:
1872     llvm_unreachable("unexpected register kind");
1873   }
1874 
1875   if (!subtargetHasRegister(*TRI, Reg))
1876     return false;
1877   return true;
1878 }
1879 
1880 Optional<StringRef>
1881 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1882   switch (RegKind) {
1883   case IS_VGPR:
1884     return StringRef(".amdgcn.next_free_vgpr");
1885   case IS_SGPR:
1886     return StringRef(".amdgcn.next_free_sgpr");
1887   default:
1888     return None;
1889   }
1890 }
1891 
1892 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1893   auto SymbolName = getGprCountSymbolName(RegKind);
1894   assert(SymbolName && "initializing invalid register kind");
1895   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1896   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1897 }
1898 
1899 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1900                                             unsigned DwordRegIndex,
1901                                             unsigned RegWidth) {
1902   // Symbols are only defined for GCN targets
1903   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1904     return true;
1905 
1906   auto SymbolName = getGprCountSymbolName(RegKind);
1907   if (!SymbolName)
1908     return true;
1909   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1910 
1911   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1912   int64_t OldCount;
1913 
1914   if (!Sym->isVariable())
1915     return !Error(getParser().getTok().getLoc(),
1916                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1917   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1918     return !Error(
1919         getParser().getTok().getLoc(),
1920         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1921 
1922   if (OldCount <= NewMax)
1923     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1924 
1925   return true;
1926 }
1927 
1928 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1929   const auto &Tok = Parser.getTok();
1930   SMLoc StartLoc = Tok.getLoc();
1931   SMLoc EndLoc = Tok.getEndLoc();
1932   RegisterKind RegKind;
1933   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1934 
1935   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1936     return nullptr;
1937   }
1938   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1939     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1940       return nullptr;
1941   } else
1942     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1943   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1944 }
1945 
1946 bool
1947 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1948   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1949       (getLexer().getKind() == AsmToken::Integer ||
1950        getLexer().getKind() == AsmToken::Real)) {
1951     // This is a workaround for handling operands like these:
1952     //     |1.0|
1953     //     |-1|
1954     // This syntax is not compatible with syntax of standard
1955     // MC expressions (due to the trailing '|').
1956 
1957     SMLoc EndLoc;
1958     const MCExpr *Expr;
1959 
1960     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1961       return true;
1962     }
1963 
1964     return !Expr->evaluateAsAbsolute(Val);
1965   }
1966 
1967   return getParser().parseAbsoluteExpression(Val);
1968 }
1969 
1970 OperandMatchResultTy
1971 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1972   // TODO: add syntactic sugar for 1/(2*PI)
1973   bool Minus = false;
1974   if (getLexer().getKind() == AsmToken::Minus) {
1975     const AsmToken NextToken = getLexer().peekTok();
1976     if (!NextToken.is(AsmToken::Integer) &&
1977         !NextToken.is(AsmToken::Real)) {
1978         return MatchOperand_NoMatch;
1979     }
1980     Minus = true;
1981     Parser.Lex();
1982   }
1983 
1984   SMLoc S = Parser.getTok().getLoc();
1985   switch(getLexer().getKind()) {
1986   case AsmToken::Integer: {
1987     int64_t IntVal;
1988     if (parseAbsoluteExpr(IntVal, AbsMod))
1989       return MatchOperand_ParseFail;
1990     if (Minus)
1991       IntVal *= -1;
1992     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1993     return MatchOperand_Success;
1994   }
1995   case AsmToken::Real: {
1996     int64_t IntVal;
1997     if (parseAbsoluteExpr(IntVal, AbsMod))
1998       return MatchOperand_ParseFail;
1999 
2000     APFloat F(BitsToDouble(IntVal));
2001     if (Minus)
2002       F.changeSign();
2003     Operands.push_back(
2004         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
2005                                  AMDGPUOperand::ImmTyNone, true));
2006     return MatchOperand_Success;
2007   }
2008   default:
2009     return MatchOperand_NoMatch;
2010   }
2011 }
2012 
2013 OperandMatchResultTy
2014 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2015   if (auto R = parseRegister()) {
2016     assert(R->isReg());
2017     R->Reg.IsForcedVOP3 = isForcedVOP3();
2018     Operands.push_back(std::move(R));
2019     return MatchOperand_Success;
2020   }
2021   return MatchOperand_NoMatch;
2022 }
2023 
2024 OperandMatchResultTy
2025 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
2026   auto res = parseImm(Operands, AbsMod);
2027   if (res != MatchOperand_NoMatch) {
2028     return res;
2029   }
2030 
2031   return parseReg(Operands);
2032 }
2033 
2034 OperandMatchResultTy
2035 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2036                                               bool AllowImm) {
2037   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
2038 
2039   if (getLexer().getKind()== AsmToken::Minus) {
2040     const AsmToken NextToken = getLexer().peekTok();
2041 
2042     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2043     if (NextToken.is(AsmToken::Minus)) {
2044       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
2045       return MatchOperand_ParseFail;
2046     }
2047 
2048     // '-' followed by an integer literal N should be interpreted as integer
2049     // negation rather than a floating-point NEG modifier applied to N.
2050     // Beside being contr-intuitive, such use of floating-point NEG modifier
2051     // results in different meaning of integer literals used with VOP1/2/C
2052     // and VOP3, for example:
2053     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2054     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2055     // Negative fp literals should be handled likewise for unifomtity
2056     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
2057       Parser.Lex();
2058       Negate = true;
2059     }
2060   }
2061 
2062   if (getLexer().getKind() == AsmToken::Identifier &&
2063       Parser.getTok().getString() == "neg") {
2064     if (Negate) {
2065       Error(Parser.getTok().getLoc(), "expected register or immediate");
2066       return MatchOperand_ParseFail;
2067     }
2068     Parser.Lex();
2069     Negate2 = true;
2070     if (getLexer().isNot(AsmToken::LParen)) {
2071       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2072       return MatchOperand_ParseFail;
2073     }
2074     Parser.Lex();
2075   }
2076 
2077   if (getLexer().getKind() == AsmToken::Identifier &&
2078       Parser.getTok().getString() == "abs") {
2079     Parser.Lex();
2080     Abs2 = true;
2081     if (getLexer().isNot(AsmToken::LParen)) {
2082       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2083       return MatchOperand_ParseFail;
2084     }
2085     Parser.Lex();
2086   }
2087 
2088   if (getLexer().getKind() == AsmToken::Pipe) {
2089     if (Abs2) {
2090       Error(Parser.getTok().getLoc(), "expected register or immediate");
2091       return MatchOperand_ParseFail;
2092     }
2093     Parser.Lex();
2094     Abs = true;
2095   }
2096 
2097   OperandMatchResultTy Res;
2098   if (AllowImm) {
2099     Res = parseRegOrImm(Operands, Abs);
2100   } else {
2101     Res = parseReg(Operands);
2102   }
2103   if (Res != MatchOperand_Success) {
2104     return Res;
2105   }
2106 
2107   AMDGPUOperand::Modifiers Mods;
2108   if (Abs) {
2109     if (getLexer().getKind() != AsmToken::Pipe) {
2110       Error(Parser.getTok().getLoc(), "expected vertical bar");
2111       return MatchOperand_ParseFail;
2112     }
2113     Parser.Lex();
2114     Mods.Abs = true;
2115   }
2116   if (Abs2) {
2117     if (getLexer().isNot(AsmToken::RParen)) {
2118       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2119       return MatchOperand_ParseFail;
2120     }
2121     Parser.Lex();
2122     Mods.Abs = true;
2123   }
2124 
2125   if (Negate) {
2126     Mods.Neg = true;
2127   } else if (Negate2) {
2128     if (getLexer().isNot(AsmToken::RParen)) {
2129       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2130       return MatchOperand_ParseFail;
2131     }
2132     Parser.Lex();
2133     Mods.Neg = true;
2134   }
2135 
2136   if (Mods.hasFPModifiers()) {
2137     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2138     Op.setModifiers(Mods);
2139   }
2140   return MatchOperand_Success;
2141 }
2142 
2143 OperandMatchResultTy
2144 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2145                                                bool AllowImm) {
2146   bool Sext = false;
2147 
2148   if (getLexer().getKind() == AsmToken::Identifier &&
2149       Parser.getTok().getString() == "sext") {
2150     Parser.Lex();
2151     Sext = true;
2152     if (getLexer().isNot(AsmToken::LParen)) {
2153       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2154       return MatchOperand_ParseFail;
2155     }
2156     Parser.Lex();
2157   }
2158 
2159   OperandMatchResultTy Res;
2160   if (AllowImm) {
2161     Res = parseRegOrImm(Operands);
2162   } else {
2163     Res = parseReg(Operands);
2164   }
2165   if (Res != MatchOperand_Success) {
2166     return Res;
2167   }
2168 
2169   AMDGPUOperand::Modifiers Mods;
2170   if (Sext) {
2171     if (getLexer().isNot(AsmToken::RParen)) {
2172       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2173       return MatchOperand_ParseFail;
2174     }
2175     Parser.Lex();
2176     Mods.Sext = true;
2177   }
2178 
2179   if (Mods.hasIntModifiers()) {
2180     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2181     Op.setModifiers(Mods);
2182   }
2183 
2184   return MatchOperand_Success;
2185 }
2186 
2187 OperandMatchResultTy
2188 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2189   return parseRegOrImmWithFPInputMods(Operands, false);
2190 }
2191 
2192 OperandMatchResultTy
2193 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2194   return parseRegOrImmWithIntInputMods(Operands, false);
2195 }
2196 
2197 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2198   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2199   if (Reg) {
2200     Operands.push_back(std::move(Reg));
2201     return MatchOperand_Success;
2202   }
2203 
2204   const AsmToken &Tok = Parser.getTok();
2205   if (Tok.getString() == "off") {
2206     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2207                                                 AMDGPUOperand::ImmTyOff, false));
2208     Parser.Lex();
2209     return MatchOperand_Success;
2210   }
2211 
2212   return MatchOperand_NoMatch;
2213 }
2214 
2215 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2216   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2217 
2218   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2219       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2220       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2221       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2222     return Match_InvalidOperand;
2223 
2224   if ((TSFlags & SIInstrFlags::VOP3) &&
2225       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2226       getForcedEncodingSize() != 64)
2227     return Match_PreferE32;
2228 
2229   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2230       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2231     // v_mac_f32/16 allow only dst_sel == DWORD;
2232     auto OpNum =
2233         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2234     const auto &Op = Inst.getOperand(OpNum);
2235     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2236       return Match_InvalidOperand;
2237     }
2238   }
2239 
2240   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2241     // FIXME: Produces error without correct column reported.
2242     auto OpNum =
2243         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2244     const auto &Op = Inst.getOperand(OpNum);
2245     if (Op.getImm() != 0)
2246       return Match_InvalidOperand;
2247   }
2248 
2249   return Match_Success;
2250 }
2251 
2252 // What asm variants we should check
2253 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2254   if (getForcedEncodingSize() == 32) {
2255     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2256     return makeArrayRef(Variants);
2257   }
2258 
2259   if (isForcedVOP3()) {
2260     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2261     return makeArrayRef(Variants);
2262   }
2263 
2264   if (isForcedSDWA()) {
2265     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2266                                         AMDGPUAsmVariants::SDWA9};
2267     return makeArrayRef(Variants);
2268   }
2269 
2270   if (isForcedDPP()) {
2271     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2272     return makeArrayRef(Variants);
2273   }
2274 
2275   static const unsigned Variants[] = {
2276     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2277     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2278   };
2279 
2280   return makeArrayRef(Variants);
2281 }
2282 
2283 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2284   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2285   const unsigned Num = Desc.getNumImplicitUses();
2286   for (unsigned i = 0; i < Num; ++i) {
2287     unsigned Reg = Desc.ImplicitUses[i];
2288     switch (Reg) {
2289     case AMDGPU::FLAT_SCR:
2290     case AMDGPU::VCC:
2291     case AMDGPU::M0:
2292       return Reg;
2293     default:
2294       break;
2295     }
2296   }
2297   return AMDGPU::NoRegister;
2298 }
2299 
2300 // NB: This code is correct only when used to check constant
2301 // bus limitations because GFX7 support no f16 inline constants.
2302 // Note that there are no cases when a GFX7 opcode violates
2303 // constant bus limitations due to the use of an f16 constant.
2304 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2305                                        unsigned OpIdx) const {
2306   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2307 
2308   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2309     return false;
2310   }
2311 
2312   const MCOperand &MO = Inst.getOperand(OpIdx);
2313 
2314   int64_t Val = MO.getImm();
2315   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2316 
2317   switch (OpSize) { // expected operand size
2318   case 8:
2319     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2320   case 4:
2321     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2322   case 2: {
2323     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2324     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2325         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2326       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2327     } else {
2328       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2329     }
2330   }
2331   default:
2332     llvm_unreachable("invalid operand size");
2333   }
2334 }
2335 
2336 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2337   const MCOperand &MO = Inst.getOperand(OpIdx);
2338   if (MO.isImm()) {
2339     return !isInlineConstant(Inst, OpIdx);
2340   }
2341   return !MO.isReg() ||
2342          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2343 }
2344 
2345 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2346   const unsigned Opcode = Inst.getOpcode();
2347   const MCInstrDesc &Desc = MII.get(Opcode);
2348   unsigned ConstantBusUseCount = 0;
2349 
2350   if (Desc.TSFlags &
2351       (SIInstrFlags::VOPC |
2352        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2353        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2354        SIInstrFlags::SDWA)) {
2355     // Check special imm operands (used by madmk, etc)
2356     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2357       ++ConstantBusUseCount;
2358     }
2359 
2360     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2361     if (SGPRUsed != AMDGPU::NoRegister) {
2362       ++ConstantBusUseCount;
2363     }
2364 
2365     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2366     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2367     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2368 
2369     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2370 
2371     for (int OpIdx : OpIndices) {
2372       if (OpIdx == -1) break;
2373 
2374       const MCOperand &MO = Inst.getOperand(OpIdx);
2375       if (usesConstantBus(Inst, OpIdx)) {
2376         if (MO.isReg()) {
2377           const unsigned Reg = mc2PseudoReg(MO.getReg());
2378           // Pairs of registers with a partial intersections like these
2379           //   s0, s[0:1]
2380           //   flat_scratch_lo, flat_scratch
2381           //   flat_scratch_lo, flat_scratch_hi
2382           // are theoretically valid but they are disabled anyway.
2383           // Note that this code mimics SIInstrInfo::verifyInstruction
2384           if (Reg != SGPRUsed) {
2385             ++ConstantBusUseCount;
2386           }
2387           SGPRUsed = Reg;
2388         } else { // Expression or a literal
2389           ++ConstantBusUseCount;
2390         }
2391       }
2392     }
2393   }
2394 
2395   return ConstantBusUseCount <= 1;
2396 }
2397 
2398 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2399   const unsigned Opcode = Inst.getOpcode();
2400   const MCInstrDesc &Desc = MII.get(Opcode);
2401 
2402   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2403   if (DstIdx == -1 ||
2404       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2405     return true;
2406   }
2407 
2408   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2409 
2410   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2411   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2412   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2413 
2414   assert(DstIdx != -1);
2415   const MCOperand &Dst = Inst.getOperand(DstIdx);
2416   assert(Dst.isReg());
2417   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2418 
2419   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2420 
2421   for (int SrcIdx : SrcIndices) {
2422     if (SrcIdx == -1) break;
2423     const MCOperand &Src = Inst.getOperand(SrcIdx);
2424     if (Src.isReg()) {
2425       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2426       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2427         return false;
2428       }
2429     }
2430   }
2431 
2432   return true;
2433 }
2434 
2435 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2436 
2437   const unsigned Opc = Inst.getOpcode();
2438   const MCInstrDesc &Desc = MII.get(Opc);
2439 
2440   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2441     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2442     assert(ClampIdx != -1);
2443     return Inst.getOperand(ClampIdx).getImm() == 0;
2444   }
2445 
2446   return true;
2447 }
2448 
2449 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2450 
2451   const unsigned Opc = Inst.getOpcode();
2452   const MCInstrDesc &Desc = MII.get(Opc);
2453 
2454   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2455     return true;
2456 
2457   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2458   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2459   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2460 
2461   assert(VDataIdx != -1);
2462   assert(DMaskIdx != -1);
2463   assert(TFEIdx != -1);
2464 
2465   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2466   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2467   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2468   if (DMask == 0)
2469     DMask = 1;
2470 
2471   unsigned DataSize =
2472     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2473   if (hasPackedD16()) {
2474     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2475     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2476       DataSize = (DataSize + 1) / 2;
2477   }
2478 
2479   return (VDataSize / 4) == DataSize + TFESize;
2480 }
2481 
2482 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2483 
2484   const unsigned Opc = Inst.getOpcode();
2485   const MCInstrDesc &Desc = MII.get(Opc);
2486 
2487   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2488     return true;
2489   if (!Desc.mayLoad() || !Desc.mayStore())
2490     return true; // Not atomic
2491 
2492   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2493   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2494 
2495   // This is an incomplete check because image_atomic_cmpswap
2496   // may only use 0x3 and 0xf while other atomic operations
2497   // may use 0x1 and 0x3. However these limitations are
2498   // verified when we check that dmask matches dst size.
2499   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2500 }
2501 
2502 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2503 
2504   const unsigned Opc = Inst.getOpcode();
2505   const MCInstrDesc &Desc = MII.get(Opc);
2506 
2507   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2508     return true;
2509 
2510   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2511   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2512 
2513   // GATHER4 instructions use dmask in a different fashion compared to
2514   // other MIMG instructions. The only useful DMASK values are
2515   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2516   // (red,red,red,red) etc.) The ISA document doesn't mention
2517   // this.
2518   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2519 }
2520 
2521 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2522 
2523   const unsigned Opc = Inst.getOpcode();
2524   const MCInstrDesc &Desc = MII.get(Opc);
2525 
2526   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2527     return true;
2528 
2529   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2530   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2531     if (isCI() || isSI())
2532       return false;
2533   }
2534 
2535   return true;
2536 }
2537 
2538 static bool IsRevOpcode(const unsigned Opcode)
2539 {
2540   switch (Opcode) {
2541   case AMDGPU::V_SUBREV_F32_e32:
2542   case AMDGPU::V_SUBREV_F32_e64:
2543   case AMDGPU::V_SUBREV_F32_e32_si:
2544   case AMDGPU::V_SUBREV_F32_e32_vi:
2545   case AMDGPU::V_SUBREV_F32_e64_si:
2546   case AMDGPU::V_SUBREV_F32_e64_vi:
2547   case AMDGPU::V_SUBREV_I32_e32:
2548   case AMDGPU::V_SUBREV_I32_e64:
2549   case AMDGPU::V_SUBREV_I32_e32_si:
2550   case AMDGPU::V_SUBREV_I32_e64_si:
2551   case AMDGPU::V_SUBBREV_U32_e32:
2552   case AMDGPU::V_SUBBREV_U32_e64:
2553   case AMDGPU::V_SUBBREV_U32_e32_si:
2554   case AMDGPU::V_SUBBREV_U32_e32_vi:
2555   case AMDGPU::V_SUBBREV_U32_e64_si:
2556   case AMDGPU::V_SUBBREV_U32_e64_vi:
2557   case AMDGPU::V_SUBREV_U32_e32:
2558   case AMDGPU::V_SUBREV_U32_e64:
2559   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2560   case AMDGPU::V_SUBREV_U32_e32_vi:
2561   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2562   case AMDGPU::V_SUBREV_U32_e64_vi:
2563   case AMDGPU::V_SUBREV_F16_e32:
2564   case AMDGPU::V_SUBREV_F16_e64:
2565   case AMDGPU::V_SUBREV_F16_e32_vi:
2566   case AMDGPU::V_SUBREV_F16_e64_vi:
2567   case AMDGPU::V_SUBREV_U16_e32:
2568   case AMDGPU::V_SUBREV_U16_e64:
2569   case AMDGPU::V_SUBREV_U16_e32_vi:
2570   case AMDGPU::V_SUBREV_U16_e64_vi:
2571   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2572   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2573   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2574   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2575   case AMDGPU::V_LSHLREV_B32_e32_si:
2576   case AMDGPU::V_LSHLREV_B32_e64_si:
2577   case AMDGPU::V_LSHLREV_B16_e32_vi:
2578   case AMDGPU::V_LSHLREV_B16_e64_vi:
2579   case AMDGPU::V_LSHLREV_B32_e32_vi:
2580   case AMDGPU::V_LSHLREV_B32_e64_vi:
2581   case AMDGPU::V_LSHLREV_B64_vi:
2582   case AMDGPU::V_LSHRREV_B32_e32_si:
2583   case AMDGPU::V_LSHRREV_B32_e64_si:
2584   case AMDGPU::V_LSHRREV_B16_e32_vi:
2585   case AMDGPU::V_LSHRREV_B16_e64_vi:
2586   case AMDGPU::V_LSHRREV_B32_e32_vi:
2587   case AMDGPU::V_LSHRREV_B32_e64_vi:
2588   case AMDGPU::V_LSHRREV_B64_vi:
2589   case AMDGPU::V_ASHRREV_I32_e64_si:
2590   case AMDGPU::V_ASHRREV_I32_e32_si:
2591   case AMDGPU::V_ASHRREV_I16_e32_vi:
2592   case AMDGPU::V_ASHRREV_I16_e64_vi:
2593   case AMDGPU::V_ASHRREV_I32_e32_vi:
2594   case AMDGPU::V_ASHRREV_I32_e64_vi:
2595   case AMDGPU::V_ASHRREV_I64_vi:
2596   case AMDGPU::V_PK_LSHLREV_B16_vi:
2597   case AMDGPU::V_PK_LSHRREV_B16_vi:
2598   case AMDGPU::V_PK_ASHRREV_I16_vi:
2599     return true;
2600   default:
2601     return false;
2602   }
2603 }
2604 
2605 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2606 
2607   using namespace SIInstrFlags;
2608   const unsigned Opcode = Inst.getOpcode();
2609   const MCInstrDesc &Desc = MII.get(Opcode);
2610 
2611   // lds_direct register is defined so that it can be used
2612   // with 9-bit operands only. Ignore encodings which do not accept these.
2613   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2614     return true;
2615 
2616   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2617   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2618   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2619 
2620   const int SrcIndices[] = { Src1Idx, Src2Idx };
2621 
2622   // lds_direct cannot be specified as either src1 or src2.
2623   for (int SrcIdx : SrcIndices) {
2624     if (SrcIdx == -1) break;
2625     const MCOperand &Src = Inst.getOperand(SrcIdx);
2626     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2627       return false;
2628     }
2629   }
2630 
2631   if (Src0Idx == -1)
2632     return true;
2633 
2634   const MCOperand &Src = Inst.getOperand(Src0Idx);
2635   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2636     return true;
2637 
2638   // lds_direct is specified as src0. Check additional limitations.
2639   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2640 }
2641 
2642 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2643   unsigned Opcode = Inst.getOpcode();
2644   const MCInstrDesc &Desc = MII.get(Opcode);
2645   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2646     return true;
2647 
2648   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2649   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2650 
2651   const int OpIndices[] = { Src0Idx, Src1Idx };
2652 
2653   unsigned NumLiterals = 0;
2654   uint32_t LiteralValue;
2655 
2656   for (int OpIdx : OpIndices) {
2657     if (OpIdx == -1) break;
2658 
2659     const MCOperand &MO = Inst.getOperand(OpIdx);
2660     if (MO.isImm() &&
2661         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2662         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2663         !isInlineConstant(Inst, OpIdx)) {
2664       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2665       if (NumLiterals == 0 || LiteralValue != Value) {
2666         LiteralValue = Value;
2667         ++NumLiterals;
2668       }
2669     }
2670   }
2671 
2672   return NumLiterals <= 1;
2673 }
2674 
2675 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2676                                           const SMLoc &IDLoc) {
2677   if (!validateLdsDirect(Inst)) {
2678     Error(IDLoc,
2679       "invalid use of lds_direct");
2680     return false;
2681   }
2682   if (!validateSOPLiteral(Inst)) {
2683     Error(IDLoc,
2684       "only one literal operand is allowed");
2685     return false;
2686   }
2687   if (!validateConstantBusLimitations(Inst)) {
2688     Error(IDLoc,
2689       "invalid operand (violates constant bus restrictions)");
2690     return false;
2691   }
2692   if (!validateEarlyClobberLimitations(Inst)) {
2693     Error(IDLoc,
2694       "destination must be different than all sources");
2695     return false;
2696   }
2697   if (!validateIntClampSupported(Inst)) {
2698     Error(IDLoc,
2699       "integer clamping is not supported on this GPU");
2700     return false;
2701   }
2702   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2703   if (!validateMIMGD16(Inst)) {
2704     Error(IDLoc,
2705       "d16 modifier is not supported on this GPU");
2706     return false;
2707   }
2708   if (!validateMIMGDataSize(Inst)) {
2709     Error(IDLoc,
2710       "image data size does not match dmask and tfe");
2711     return false;
2712   }
2713   if (!validateMIMGAtomicDMask(Inst)) {
2714     Error(IDLoc,
2715       "invalid atomic image dmask");
2716     return false;
2717   }
2718   if (!validateMIMGGatherDMask(Inst)) {
2719     Error(IDLoc,
2720       "invalid image_gather dmask: only one bit must be set");
2721     return false;
2722   }
2723 
2724   return true;
2725 }
2726 
2727 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2728                                             const FeatureBitset &FBS,
2729                                             unsigned VariantID = 0);
2730 
2731 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2732                                               OperandVector &Operands,
2733                                               MCStreamer &Out,
2734                                               uint64_t &ErrorInfo,
2735                                               bool MatchingInlineAsm) {
2736   MCInst Inst;
2737   unsigned Result = Match_Success;
2738   for (auto Variant : getMatchedVariants()) {
2739     uint64_t EI;
2740     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2741                                   Variant);
2742     // We order match statuses from least to most specific. We use most specific
2743     // status as resulting
2744     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2745     if ((R == Match_Success) ||
2746         (R == Match_PreferE32) ||
2747         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2748         (R == Match_InvalidOperand && Result != Match_MissingFeature
2749                                    && Result != Match_PreferE32) ||
2750         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2751                                    && Result != Match_MissingFeature
2752                                    && Result != Match_PreferE32)) {
2753       Result = R;
2754       ErrorInfo = EI;
2755     }
2756     if (R == Match_Success)
2757       break;
2758   }
2759 
2760   switch (Result) {
2761   default: break;
2762   case Match_Success:
2763     if (!validateInstruction(Inst, IDLoc)) {
2764       return true;
2765     }
2766     Inst.setLoc(IDLoc);
2767     Out.EmitInstruction(Inst, getSTI());
2768     return false;
2769 
2770   case Match_MissingFeature:
2771     return Error(IDLoc, "instruction not supported on this GPU");
2772 
2773   case Match_MnemonicFail: {
2774     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2775     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2776         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2777     return Error(IDLoc, "invalid instruction" + Suggestion,
2778                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2779   }
2780 
2781   case Match_InvalidOperand: {
2782     SMLoc ErrorLoc = IDLoc;
2783     if (ErrorInfo != ~0ULL) {
2784       if (ErrorInfo >= Operands.size()) {
2785         return Error(IDLoc, "too few operands for instruction");
2786       }
2787       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2788       if (ErrorLoc == SMLoc())
2789         ErrorLoc = IDLoc;
2790     }
2791     return Error(ErrorLoc, "invalid operand for instruction");
2792   }
2793 
2794   case Match_PreferE32:
2795     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2796                         "should be encoded as e32");
2797   }
2798   llvm_unreachable("Implement any new match types added!");
2799 }
2800 
2801 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2802   int64_t Tmp = -1;
2803   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2804     return true;
2805   }
2806   if (getParser().parseAbsoluteExpression(Tmp)) {
2807     return true;
2808   }
2809   Ret = static_cast<uint32_t>(Tmp);
2810   return false;
2811 }
2812 
2813 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2814                                                uint32_t &Minor) {
2815   if (ParseAsAbsoluteExpression(Major))
2816     return TokError("invalid major version");
2817 
2818   if (getLexer().isNot(AsmToken::Comma))
2819     return TokError("minor version number required, comma expected");
2820   Lex();
2821 
2822   if (ParseAsAbsoluteExpression(Minor))
2823     return TokError("invalid minor version");
2824 
2825   return false;
2826 }
2827 
2828 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2829   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2830     return TokError("directive only supported for amdgcn architecture");
2831 
2832   std::string Target;
2833 
2834   SMLoc TargetStart = getTok().getLoc();
2835   if (getParser().parseEscapedString(Target))
2836     return true;
2837   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2838 
2839   std::string ExpectedTarget;
2840   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2841   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2842 
2843   if (Target != ExpectedTargetOS.str())
2844     return getParser().Error(TargetRange.Start, "target must match options",
2845                              TargetRange);
2846 
2847   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2848   return false;
2849 }
2850 
2851 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2852   return getParser().Error(Range.Start, "value out of range", Range);
2853 }
2854 
2855 bool AMDGPUAsmParser::calculateGPRBlocks(
2856     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2857     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2858     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2859     unsigned &SGPRBlocks) {
2860   // TODO(scott.linder): These calculations are duplicated from
2861   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2862   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2863 
2864   unsigned NumVGPRs = NextFreeVGPR;
2865   unsigned NumSGPRs = NextFreeSGPR;
2866   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2867 
2868   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2869       NumSGPRs > MaxAddressableNumSGPRs)
2870     return OutOfRangeError(SGPRRange);
2871 
2872   NumSGPRs +=
2873       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2874 
2875   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2876       NumSGPRs > MaxAddressableNumSGPRs)
2877     return OutOfRangeError(SGPRRange);
2878 
2879   if (Features.test(FeatureSGPRInitBug))
2880     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2881 
2882   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2883   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2884 
2885   return false;
2886 }
2887 
2888 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2889   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2890     return TokError("directive only supported for amdgcn architecture");
2891 
2892   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2893     return TokError("directive only supported for amdhsa OS");
2894 
2895   StringRef KernelName;
2896   if (getParser().parseIdentifier(KernelName))
2897     return true;
2898 
2899   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2900 
2901   StringSet<> Seen;
2902 
2903   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2904 
2905   SMRange VGPRRange;
2906   uint64_t NextFreeVGPR = 0;
2907   SMRange SGPRRange;
2908   uint64_t NextFreeSGPR = 0;
2909   unsigned UserSGPRCount = 0;
2910   bool ReserveVCC = true;
2911   bool ReserveFlatScr = true;
2912   bool ReserveXNACK = hasXNACK();
2913 
2914   while (true) {
2915     while (getLexer().is(AsmToken::EndOfStatement))
2916       Lex();
2917 
2918     if (getLexer().isNot(AsmToken::Identifier))
2919       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2920 
2921     StringRef ID = getTok().getIdentifier();
2922     SMRange IDRange = getTok().getLocRange();
2923     Lex();
2924 
2925     if (ID == ".end_amdhsa_kernel")
2926       break;
2927 
2928     if (Seen.find(ID) != Seen.end())
2929       return TokError(".amdhsa_ directives cannot be repeated");
2930     Seen.insert(ID);
2931 
2932     SMLoc ValStart = getTok().getLoc();
2933     int64_t IVal;
2934     if (getParser().parseAbsoluteExpression(IVal))
2935       return true;
2936     SMLoc ValEnd = getTok().getLoc();
2937     SMRange ValRange = SMRange(ValStart, ValEnd);
2938 
2939     if (IVal < 0)
2940       return OutOfRangeError(ValRange);
2941 
2942     uint64_t Val = IVal;
2943 
2944 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2945   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2946     return OutOfRangeError(RANGE);                                             \
2947   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2948 
2949     if (ID == ".amdhsa_group_segment_fixed_size") {
2950       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2951         return OutOfRangeError(ValRange);
2952       KD.group_segment_fixed_size = Val;
2953     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2954       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2955         return OutOfRangeError(ValRange);
2956       KD.private_segment_fixed_size = Val;
2957     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2958       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2959                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2960                        Val, ValRange);
2961       UserSGPRCount += 4;
2962     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2963       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2964                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2965                        ValRange);
2966       UserSGPRCount += 2;
2967     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2968       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2969                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2970                        ValRange);
2971       UserSGPRCount += 2;
2972     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2973       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2974                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2975                        Val, ValRange);
2976       UserSGPRCount += 2;
2977     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2978       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2979                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2980                        ValRange);
2981       UserSGPRCount += 2;
2982     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2983       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2984                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2985                        ValRange);
2986       UserSGPRCount += 2;
2987     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2988       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2989                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2990                        Val, ValRange);
2991       UserSGPRCount += 1;
2992     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2993       PARSE_BITS_ENTRY(
2994           KD.compute_pgm_rsrc2,
2995           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2996           ValRange);
2997     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2998       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2999                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3000                        ValRange);
3001     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3002       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3003                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3004                        ValRange);
3005     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3006       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3007                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3008                        ValRange);
3009     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3011                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3012                        ValRange);
3013     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3015                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3016                        ValRange);
3017     } else if (ID == ".amdhsa_next_free_vgpr") {
3018       VGPRRange = ValRange;
3019       NextFreeVGPR = Val;
3020     } else if (ID == ".amdhsa_next_free_sgpr") {
3021       SGPRRange = ValRange;
3022       NextFreeSGPR = Val;
3023     } else if (ID == ".amdhsa_reserve_vcc") {
3024       if (!isUInt<1>(Val))
3025         return OutOfRangeError(ValRange);
3026       ReserveVCC = Val;
3027     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3028       if (IVersion.Major < 7)
3029         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3030                                  IDRange);
3031       if (!isUInt<1>(Val))
3032         return OutOfRangeError(ValRange);
3033       ReserveFlatScr = Val;
3034     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3035       if (IVersion.Major < 8)
3036         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3037                                  IDRange);
3038       if (!isUInt<1>(Val))
3039         return OutOfRangeError(ValRange);
3040       ReserveXNACK = Val;
3041     } else if (ID == ".amdhsa_float_round_mode_32") {
3042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3043                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3044     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3045       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3046                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3047     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3048       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3049                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3050     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3051       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3052                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3053                        ValRange);
3054     } else if (ID == ".amdhsa_dx10_clamp") {
3055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3056                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3057     } else if (ID == ".amdhsa_ieee_mode") {
3058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3059                        Val, ValRange);
3060     } else if (ID == ".amdhsa_fp16_overflow") {
3061       if (IVersion.Major < 9)
3062         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3063                                  IDRange);
3064       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3065                        ValRange);
3066     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3067       PARSE_BITS_ENTRY(
3068           KD.compute_pgm_rsrc2,
3069           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3070           ValRange);
3071     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3072       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3073                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3074                        Val, ValRange);
3075     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3076       PARSE_BITS_ENTRY(
3077           KD.compute_pgm_rsrc2,
3078           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3079           ValRange);
3080     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3082                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3083                        Val, ValRange);
3084     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3086                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3087                        Val, ValRange);
3088     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3089       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3090                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3091                        Val, ValRange);
3092     } else if (ID == ".amdhsa_exception_int_div_zero") {
3093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3094                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3095                        Val, ValRange);
3096     } else {
3097       return getParser().Error(IDRange.Start,
3098                                "unknown .amdhsa_kernel directive", IDRange);
3099     }
3100 
3101 #undef PARSE_BITS_ENTRY
3102   }
3103 
3104   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3105     return TokError(".amdhsa_next_free_vgpr directive is required");
3106 
3107   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3108     return TokError(".amdhsa_next_free_sgpr directive is required");
3109 
3110   unsigned VGPRBlocks;
3111   unsigned SGPRBlocks;
3112   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3113                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3114                          SGPRRange, VGPRBlocks, SGPRBlocks))
3115     return true;
3116 
3117   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3118           VGPRBlocks))
3119     return OutOfRangeError(VGPRRange);
3120   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3121                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3122 
3123   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3124           SGPRBlocks))
3125     return OutOfRangeError(SGPRRange);
3126   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3127                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3128                   SGPRBlocks);
3129 
3130   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3131     return TokError("too many user SGPRs enabled");
3132   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3133                   UserSGPRCount);
3134 
3135   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3136       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3137       ReserveFlatScr, ReserveXNACK);
3138   return false;
3139 }
3140 
3141 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3142   uint32_t Major;
3143   uint32_t Minor;
3144 
3145   if (ParseDirectiveMajorMinor(Major, Minor))
3146     return true;
3147 
3148   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3149   return false;
3150 }
3151 
3152 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3153   uint32_t Major;
3154   uint32_t Minor;
3155   uint32_t Stepping;
3156   StringRef VendorName;
3157   StringRef ArchName;
3158 
3159   // If this directive has no arguments, then use the ISA version for the
3160   // targeted GPU.
3161   if (getLexer().is(AsmToken::EndOfStatement)) {
3162     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3163     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3164                                                       ISA.Stepping,
3165                                                       "AMD", "AMDGPU");
3166     return false;
3167   }
3168 
3169   if (ParseDirectiveMajorMinor(Major, Minor))
3170     return true;
3171 
3172   if (getLexer().isNot(AsmToken::Comma))
3173     return TokError("stepping version number required, comma expected");
3174   Lex();
3175 
3176   if (ParseAsAbsoluteExpression(Stepping))
3177     return TokError("invalid stepping version");
3178 
3179   if (getLexer().isNot(AsmToken::Comma))
3180     return TokError("vendor name required, comma expected");
3181   Lex();
3182 
3183   if (getLexer().isNot(AsmToken::String))
3184     return TokError("invalid vendor name");
3185 
3186   VendorName = getLexer().getTok().getStringContents();
3187   Lex();
3188 
3189   if (getLexer().isNot(AsmToken::Comma))
3190     return TokError("arch name required, comma expected");
3191   Lex();
3192 
3193   if (getLexer().isNot(AsmToken::String))
3194     return TokError("invalid arch name");
3195 
3196   ArchName = getLexer().getTok().getStringContents();
3197   Lex();
3198 
3199   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3200                                                     VendorName, ArchName);
3201   return false;
3202 }
3203 
3204 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3205                                                amd_kernel_code_t &Header) {
3206   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3207   // assembly for backwards compatibility.
3208   if (ID == "max_scratch_backing_memory_byte_size") {
3209     Parser.eatToEndOfStatement();
3210     return false;
3211   }
3212 
3213   SmallString<40> ErrStr;
3214   raw_svector_ostream Err(ErrStr);
3215   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3216     return TokError(Err.str());
3217   }
3218   Lex();
3219   return false;
3220 }
3221 
3222 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3223   amd_kernel_code_t Header;
3224   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3225 
3226   while (true) {
3227     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3228     // will set the current token to EndOfStatement.
3229     while(getLexer().is(AsmToken::EndOfStatement))
3230       Lex();
3231 
3232     if (getLexer().isNot(AsmToken::Identifier))
3233       return TokError("expected value identifier or .end_amd_kernel_code_t");
3234 
3235     StringRef ID = getLexer().getTok().getIdentifier();
3236     Lex();
3237 
3238     if (ID == ".end_amd_kernel_code_t")
3239       break;
3240 
3241     if (ParseAMDKernelCodeTValue(ID, Header))
3242       return true;
3243   }
3244 
3245   getTargetStreamer().EmitAMDKernelCodeT(Header);
3246 
3247   return false;
3248 }
3249 
3250 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3251   if (getLexer().isNot(AsmToken::Identifier))
3252     return TokError("expected symbol name");
3253 
3254   StringRef KernelName = Parser.getTok().getString();
3255 
3256   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3257                                            ELF::STT_AMDGPU_HSA_KERNEL);
3258   Lex();
3259   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3260     KernelScope.initialize(getContext());
3261   return false;
3262 }
3263 
3264 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3265   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3266     return Error(getParser().getTok().getLoc(),
3267                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3268                  "architectures");
3269   }
3270 
3271   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3272 
3273   std::string ISAVersionStringFromSTI;
3274   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3275   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3276 
3277   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3278     return Error(getParser().getTok().getLoc(),
3279                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3280                  "arguments specified through the command line");
3281   }
3282 
3283   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3284   Lex();
3285 
3286   return false;
3287 }
3288 
3289 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3290   const char *AssemblerDirectiveBegin;
3291   const char *AssemblerDirectiveEnd;
3292   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3293       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3294           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3295                             HSAMD::V3::AssemblerDirectiveEnd)
3296           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3297                             HSAMD::AssemblerDirectiveEnd);
3298 
3299   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3300     return Error(getParser().getTok().getLoc(),
3301                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3302                  "not available on non-amdhsa OSes")).str());
3303   }
3304 
3305   std::string HSAMetadataString;
3306   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3307                           HSAMetadataString))
3308     return true;
3309 
3310   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3311     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3312       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3313   } else {
3314     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3315       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3316   }
3317 
3318   return false;
3319 }
3320 
3321 /// Common code to parse out a block of text (typically YAML) between start and
3322 /// end directives.
3323 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3324                                           const char *AssemblerDirectiveEnd,
3325                                           std::string &CollectString) {
3326 
3327   raw_string_ostream CollectStream(CollectString);
3328 
3329   getLexer().setSkipSpace(false);
3330 
3331   bool FoundEnd = false;
3332   while (!getLexer().is(AsmToken::Eof)) {
3333     while (getLexer().is(AsmToken::Space)) {
3334       CollectStream << getLexer().getTok().getString();
3335       Lex();
3336     }
3337 
3338     if (getLexer().is(AsmToken::Identifier)) {
3339       StringRef ID = getLexer().getTok().getIdentifier();
3340       if (ID == AssemblerDirectiveEnd) {
3341         Lex();
3342         FoundEnd = true;
3343         break;
3344       }
3345     }
3346 
3347     CollectStream << Parser.parseStringToEndOfStatement()
3348                   << getContext().getAsmInfo()->getSeparatorString();
3349 
3350     Parser.eatToEndOfStatement();
3351   }
3352 
3353   getLexer().setSkipSpace(true);
3354 
3355   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3356     return TokError(Twine("expected directive ") +
3357                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3358   }
3359 
3360   CollectStream.flush();
3361   return false;
3362 }
3363 
3364 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3365 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3366   std::string String;
3367   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3368                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3369     return true;
3370 
3371   auto PALMetadata = getTargetStreamer().getPALMetadata();
3372   if (!PALMetadata->setFromString(String))
3373     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3374   return false;
3375 }
3376 
3377 /// Parse the assembler directive for old linear-format PAL metadata.
3378 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3379   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3380     return Error(getParser().getTok().getLoc(),
3381                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3382                  "not available on non-amdpal OSes")).str());
3383   }
3384 
3385   auto PALMetadata = getTargetStreamer().getPALMetadata();
3386   PALMetadata->setLegacy();
3387   for (;;) {
3388     uint32_t Key, Value;
3389     if (ParseAsAbsoluteExpression(Key)) {
3390       return TokError(Twine("invalid value in ") +
3391                       Twine(PALMD::AssemblerDirective));
3392     }
3393     if (getLexer().isNot(AsmToken::Comma)) {
3394       return TokError(Twine("expected an even number of values in ") +
3395                       Twine(PALMD::AssemblerDirective));
3396     }
3397     Lex();
3398     if (ParseAsAbsoluteExpression(Value)) {
3399       return TokError(Twine("invalid value in ") +
3400                       Twine(PALMD::AssemblerDirective));
3401     }
3402     PALMetadata->setRegister(Key, Value);
3403     if (getLexer().isNot(AsmToken::Comma))
3404       break;
3405     Lex();
3406   }
3407   return false;
3408 }
3409 
3410 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3411   StringRef IDVal = DirectiveID.getString();
3412 
3413   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3414     if (IDVal == ".amdgcn_target")
3415       return ParseDirectiveAMDGCNTarget();
3416 
3417     if (IDVal == ".amdhsa_kernel")
3418       return ParseDirectiveAMDHSAKernel();
3419 
3420     // TODO: Restructure/combine with PAL metadata directive.
3421     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3422       return ParseDirectiveHSAMetadata();
3423   } else {
3424     if (IDVal == ".hsa_code_object_version")
3425       return ParseDirectiveHSACodeObjectVersion();
3426 
3427     if (IDVal == ".hsa_code_object_isa")
3428       return ParseDirectiveHSACodeObjectISA();
3429 
3430     if (IDVal == ".amd_kernel_code_t")
3431       return ParseDirectiveAMDKernelCodeT();
3432 
3433     if (IDVal == ".amdgpu_hsa_kernel")
3434       return ParseDirectiveAMDGPUHsaKernel();
3435 
3436     if (IDVal == ".amd_amdgpu_isa")
3437       return ParseDirectiveISAVersion();
3438 
3439     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3440       return ParseDirectiveHSAMetadata();
3441   }
3442 
3443   if (IDVal == PALMD::AssemblerDirectiveBegin)
3444     return ParseDirectivePALMetadataBegin();
3445 
3446   if (IDVal == PALMD::AssemblerDirective)
3447     return ParseDirectivePALMetadata();
3448 
3449   return true;
3450 }
3451 
3452 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3453                                            unsigned RegNo) const {
3454 
3455   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3456        R.isValid(); ++R) {
3457     if (*R == RegNo)
3458       return isGFX9();
3459   }
3460 
3461   switch (RegNo) {
3462   case AMDGPU::TBA:
3463   case AMDGPU::TBA_LO:
3464   case AMDGPU::TBA_HI:
3465   case AMDGPU::TMA:
3466   case AMDGPU::TMA_LO:
3467   case AMDGPU::TMA_HI:
3468     return !isGFX9();
3469   case AMDGPU::XNACK_MASK:
3470   case AMDGPU::XNACK_MASK_LO:
3471   case AMDGPU::XNACK_MASK_HI:
3472     return !isCI() && !isSI() && hasXNACK();
3473   default:
3474     break;
3475   }
3476 
3477   if (isInlineValue(RegNo))
3478     return !isCI() && !isSI() && !isVI();
3479 
3480   if (isCI())
3481     return true;
3482 
3483   if (isSI()) {
3484     // No flat_scr
3485     switch (RegNo) {
3486     case AMDGPU::FLAT_SCR:
3487     case AMDGPU::FLAT_SCR_LO:
3488     case AMDGPU::FLAT_SCR_HI:
3489       return false;
3490     default:
3491       return true;
3492     }
3493   }
3494 
3495   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3496   // SI/CI have.
3497   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3498        R.isValid(); ++R) {
3499     if (*R == RegNo)
3500       return false;
3501   }
3502 
3503   return true;
3504 }
3505 
3506 OperandMatchResultTy
3507 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3508   // Try to parse with a custom parser
3509   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3510 
3511   // If we successfully parsed the operand or if there as an error parsing,
3512   // we are done.
3513   //
3514   // If we are parsing after we reach EndOfStatement then this means we
3515   // are appending default values to the Operands list.  This is only done
3516   // by custom parser, so we shouldn't continue on to the generic parsing.
3517   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3518       getLexer().is(AsmToken::EndOfStatement))
3519     return ResTy;
3520 
3521   ResTy = parseRegOrImm(Operands);
3522 
3523   if (ResTy == MatchOperand_Success)
3524     return ResTy;
3525 
3526   const auto &Tok = Parser.getTok();
3527   SMLoc S = Tok.getLoc();
3528 
3529   const MCExpr *Expr = nullptr;
3530   if (!Parser.parseExpression(Expr)) {
3531     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3532     return MatchOperand_Success;
3533   }
3534 
3535   // Possibly this is an instruction flag like 'gds'.
3536   if (Tok.getKind() == AsmToken::Identifier) {
3537     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3538     Parser.Lex();
3539     return MatchOperand_Success;
3540   }
3541 
3542   return MatchOperand_NoMatch;
3543 }
3544 
3545 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3546   // Clear any forced encodings from the previous instruction.
3547   setForcedEncodingSize(0);
3548   setForcedDPP(false);
3549   setForcedSDWA(false);
3550 
3551   if (Name.endswith("_e64")) {
3552     setForcedEncodingSize(64);
3553     return Name.substr(0, Name.size() - 4);
3554   } else if (Name.endswith("_e32")) {
3555     setForcedEncodingSize(32);
3556     return Name.substr(0, Name.size() - 4);
3557   } else if (Name.endswith("_dpp")) {
3558     setForcedDPP(true);
3559     return Name.substr(0, Name.size() - 4);
3560   } else if (Name.endswith("_sdwa")) {
3561     setForcedSDWA(true);
3562     return Name.substr(0, Name.size() - 5);
3563   }
3564   return Name;
3565 }
3566 
3567 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3568                                        StringRef Name,
3569                                        SMLoc NameLoc, OperandVector &Operands) {
3570   // Add the instruction mnemonic
3571   Name = parseMnemonicSuffix(Name);
3572   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3573 
3574   while (!getLexer().is(AsmToken::EndOfStatement)) {
3575     OperandMatchResultTy Res = parseOperand(Operands, Name);
3576 
3577     // Eat the comma or space if there is one.
3578     if (getLexer().is(AsmToken::Comma))
3579       Parser.Lex();
3580 
3581     switch (Res) {
3582       case MatchOperand_Success: break;
3583       case MatchOperand_ParseFail:
3584         Error(getLexer().getLoc(), "failed parsing operand.");
3585         while (!getLexer().is(AsmToken::EndOfStatement)) {
3586           Parser.Lex();
3587         }
3588         return true;
3589       case MatchOperand_NoMatch:
3590         Error(getLexer().getLoc(), "not a valid operand.");
3591         while (!getLexer().is(AsmToken::EndOfStatement)) {
3592           Parser.Lex();
3593         }
3594         return true;
3595     }
3596   }
3597 
3598   return false;
3599 }
3600 
3601 //===----------------------------------------------------------------------===//
3602 // Utility functions
3603 //===----------------------------------------------------------------------===//
3604 
3605 OperandMatchResultTy
3606 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3607   switch(getLexer().getKind()) {
3608     default: return MatchOperand_NoMatch;
3609     case AsmToken::Identifier: {
3610       StringRef Name = Parser.getTok().getString();
3611       if (!Name.equals(Prefix)) {
3612         return MatchOperand_NoMatch;
3613       }
3614 
3615       Parser.Lex();
3616       if (getLexer().isNot(AsmToken::Colon))
3617         return MatchOperand_ParseFail;
3618 
3619       Parser.Lex();
3620 
3621       bool IsMinus = false;
3622       if (getLexer().getKind() == AsmToken::Minus) {
3623         Parser.Lex();
3624         IsMinus = true;
3625       }
3626 
3627       if (getLexer().isNot(AsmToken::Integer))
3628         return MatchOperand_ParseFail;
3629 
3630       if (getParser().parseAbsoluteExpression(Int))
3631         return MatchOperand_ParseFail;
3632 
3633       if (IsMinus)
3634         Int = -Int;
3635       break;
3636     }
3637   }
3638   return MatchOperand_Success;
3639 }
3640 
3641 OperandMatchResultTy
3642 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3643                                     AMDGPUOperand::ImmTy ImmTy,
3644                                     bool (*ConvertResult)(int64_t&)) {
3645   SMLoc S = Parser.getTok().getLoc();
3646   int64_t Value = 0;
3647 
3648   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3649   if (Res != MatchOperand_Success)
3650     return Res;
3651 
3652   if (ConvertResult && !ConvertResult(Value)) {
3653     return MatchOperand_ParseFail;
3654   }
3655 
3656   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3657   return MatchOperand_Success;
3658 }
3659 
3660 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3661   const char *Prefix,
3662   OperandVector &Operands,
3663   AMDGPUOperand::ImmTy ImmTy,
3664   bool (*ConvertResult)(int64_t&)) {
3665   StringRef Name = Parser.getTok().getString();
3666   if (!Name.equals(Prefix))
3667     return MatchOperand_NoMatch;
3668 
3669   Parser.Lex();
3670   if (getLexer().isNot(AsmToken::Colon))
3671     return MatchOperand_ParseFail;
3672 
3673   Parser.Lex();
3674   if (getLexer().isNot(AsmToken::LBrac))
3675     return MatchOperand_ParseFail;
3676   Parser.Lex();
3677 
3678   unsigned Val = 0;
3679   SMLoc S = Parser.getTok().getLoc();
3680 
3681   // FIXME: How to verify the number of elements matches the number of src
3682   // operands?
3683   for (int I = 0; I < 4; ++I) {
3684     if (I != 0) {
3685       if (getLexer().is(AsmToken::RBrac))
3686         break;
3687 
3688       if (getLexer().isNot(AsmToken::Comma))
3689         return MatchOperand_ParseFail;
3690       Parser.Lex();
3691     }
3692 
3693     if (getLexer().isNot(AsmToken::Integer))
3694       return MatchOperand_ParseFail;
3695 
3696     int64_t Op;
3697     if (getParser().parseAbsoluteExpression(Op))
3698       return MatchOperand_ParseFail;
3699 
3700     if (Op != 0 && Op != 1)
3701       return MatchOperand_ParseFail;
3702     Val |= (Op << I);
3703   }
3704 
3705   Parser.Lex();
3706   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3707   return MatchOperand_Success;
3708 }
3709 
3710 OperandMatchResultTy
3711 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3712                                AMDGPUOperand::ImmTy ImmTy) {
3713   int64_t Bit = 0;
3714   SMLoc S = Parser.getTok().getLoc();
3715 
3716   // We are at the end of the statement, and this is a default argument, so
3717   // use a default value.
3718   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3719     switch(getLexer().getKind()) {
3720       case AsmToken::Identifier: {
3721         StringRef Tok = Parser.getTok().getString();
3722         if (Tok == Name) {
3723           if (Tok == "r128" && isGFX9())
3724             Error(S, "r128 modifier is not supported on this GPU");
3725           if (Tok == "a16" && !isGFX9())
3726             Error(S, "a16 modifier is not supported on this GPU");
3727           Bit = 1;
3728           Parser.Lex();
3729         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3730           Bit = 0;
3731           Parser.Lex();
3732         } else {
3733           return MatchOperand_NoMatch;
3734         }
3735         break;
3736       }
3737       default:
3738         return MatchOperand_NoMatch;
3739     }
3740   }
3741 
3742   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3743   return MatchOperand_Success;
3744 }
3745 
3746 static void addOptionalImmOperand(
3747   MCInst& Inst, const OperandVector& Operands,
3748   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3749   AMDGPUOperand::ImmTy ImmT,
3750   int64_t Default = 0) {
3751   auto i = OptionalIdx.find(ImmT);
3752   if (i != OptionalIdx.end()) {
3753     unsigned Idx = i->second;
3754     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3755   } else {
3756     Inst.addOperand(MCOperand::createImm(Default));
3757   }
3758 }
3759 
3760 OperandMatchResultTy
3761 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3762   if (getLexer().isNot(AsmToken::Identifier)) {
3763     return MatchOperand_NoMatch;
3764   }
3765   StringRef Tok = Parser.getTok().getString();
3766   if (Tok != Prefix) {
3767     return MatchOperand_NoMatch;
3768   }
3769 
3770   Parser.Lex();
3771   if (getLexer().isNot(AsmToken::Colon)) {
3772     return MatchOperand_ParseFail;
3773   }
3774 
3775   Parser.Lex();
3776   if (getLexer().isNot(AsmToken::Identifier)) {
3777     return MatchOperand_ParseFail;
3778   }
3779 
3780   Value = Parser.getTok().getString();
3781   return MatchOperand_Success;
3782 }
3783 
3784 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3785 // values to live in a joint format operand in the MCInst encoding.
3786 OperandMatchResultTy
3787 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3788   SMLoc S = Parser.getTok().getLoc();
3789   int64_t Dfmt = 0, Nfmt = 0;
3790   // dfmt and nfmt can appear in either order, and each is optional.
3791   bool GotDfmt = false, GotNfmt = false;
3792   while (!GotDfmt || !GotNfmt) {
3793     if (!GotDfmt) {
3794       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3795       if (Res != MatchOperand_NoMatch) {
3796         if (Res != MatchOperand_Success)
3797           return Res;
3798         if (Dfmt >= 16) {
3799           Error(Parser.getTok().getLoc(), "out of range dfmt");
3800           return MatchOperand_ParseFail;
3801         }
3802         GotDfmt = true;
3803         Parser.Lex();
3804         continue;
3805       }
3806     }
3807     if (!GotNfmt) {
3808       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3809       if (Res != MatchOperand_NoMatch) {
3810         if (Res != MatchOperand_Success)
3811           return Res;
3812         if (Nfmt >= 8) {
3813           Error(Parser.getTok().getLoc(), "out of range nfmt");
3814           return MatchOperand_ParseFail;
3815         }
3816         GotNfmt = true;
3817         Parser.Lex();
3818         continue;
3819       }
3820     }
3821     break;
3822   }
3823   if (!GotDfmt && !GotNfmt)
3824     return MatchOperand_NoMatch;
3825   auto Format = Dfmt | Nfmt << 4;
3826   Operands.push_back(
3827       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3828   return MatchOperand_Success;
3829 }
3830 
3831 //===----------------------------------------------------------------------===//
3832 // ds
3833 //===----------------------------------------------------------------------===//
3834 
3835 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3836                                     const OperandVector &Operands) {
3837   OptionalImmIndexMap OptionalIdx;
3838 
3839   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3840     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3841 
3842     // Add the register arguments
3843     if (Op.isReg()) {
3844       Op.addRegOperands(Inst, 1);
3845       continue;
3846     }
3847 
3848     // Handle optional arguments
3849     OptionalIdx[Op.getImmTy()] = i;
3850   }
3851 
3852   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3853   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3854   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3855 
3856   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3857 }
3858 
3859 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3860                                 bool IsGdsHardcoded) {
3861   OptionalImmIndexMap OptionalIdx;
3862 
3863   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3864     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3865 
3866     // Add the register arguments
3867     if (Op.isReg()) {
3868       Op.addRegOperands(Inst, 1);
3869       continue;
3870     }
3871 
3872     if (Op.isToken() && Op.getToken() == "gds") {
3873       IsGdsHardcoded = true;
3874       continue;
3875     }
3876 
3877     // Handle optional arguments
3878     OptionalIdx[Op.getImmTy()] = i;
3879   }
3880 
3881   AMDGPUOperand::ImmTy OffsetType =
3882     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3883      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3884                                                       AMDGPUOperand::ImmTyOffset;
3885 
3886   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3887 
3888   if (!IsGdsHardcoded) {
3889     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3890   }
3891   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3892 }
3893 
3894 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3895   OptionalImmIndexMap OptionalIdx;
3896 
3897   unsigned OperandIdx[4];
3898   unsigned EnMask = 0;
3899   int SrcIdx = 0;
3900 
3901   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3902     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3903 
3904     // Add the register arguments
3905     if (Op.isReg()) {
3906       assert(SrcIdx < 4);
3907       OperandIdx[SrcIdx] = Inst.size();
3908       Op.addRegOperands(Inst, 1);
3909       ++SrcIdx;
3910       continue;
3911     }
3912 
3913     if (Op.isOff()) {
3914       assert(SrcIdx < 4);
3915       OperandIdx[SrcIdx] = Inst.size();
3916       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3917       ++SrcIdx;
3918       continue;
3919     }
3920 
3921     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3922       Op.addImmOperands(Inst, 1);
3923       continue;
3924     }
3925 
3926     if (Op.isToken() && Op.getToken() == "done")
3927       continue;
3928 
3929     // Handle optional arguments
3930     OptionalIdx[Op.getImmTy()] = i;
3931   }
3932 
3933   assert(SrcIdx == 4);
3934 
3935   bool Compr = false;
3936   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3937     Compr = true;
3938     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3939     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3940     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3941   }
3942 
3943   for (auto i = 0; i < SrcIdx; ++i) {
3944     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3945       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3946     }
3947   }
3948 
3949   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3950   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3951 
3952   Inst.addOperand(MCOperand::createImm(EnMask));
3953 }
3954 
3955 //===----------------------------------------------------------------------===//
3956 // s_waitcnt
3957 //===----------------------------------------------------------------------===//
3958 
3959 static bool
3960 encodeCnt(
3961   const AMDGPU::IsaVersion ISA,
3962   int64_t &IntVal,
3963   int64_t CntVal,
3964   bool Saturate,
3965   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3966   unsigned (*decode)(const IsaVersion &Version, unsigned))
3967 {
3968   bool Failed = false;
3969 
3970   IntVal = encode(ISA, IntVal, CntVal);
3971   if (CntVal != decode(ISA, IntVal)) {
3972     if (Saturate) {
3973       IntVal = encode(ISA, IntVal, -1);
3974     } else {
3975       Failed = true;
3976     }
3977   }
3978   return Failed;
3979 }
3980 
3981 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3982   StringRef CntName = Parser.getTok().getString();
3983   int64_t CntVal;
3984 
3985   Parser.Lex();
3986   if (getLexer().isNot(AsmToken::LParen))
3987     return true;
3988 
3989   Parser.Lex();
3990   if (getLexer().isNot(AsmToken::Integer))
3991     return true;
3992 
3993   SMLoc ValLoc = Parser.getTok().getLoc();
3994   if (getParser().parseAbsoluteExpression(CntVal))
3995     return true;
3996 
3997   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3998 
3999   bool Failed = true;
4000   bool Sat = CntName.endswith("_sat");
4001 
4002   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4003     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4004   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4005     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4006   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4007     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4008   }
4009 
4010   if (Failed) {
4011     Error(ValLoc, "too large value for " + CntName);
4012     return true;
4013   }
4014 
4015   if (getLexer().isNot(AsmToken::RParen)) {
4016     return true;
4017   }
4018 
4019   Parser.Lex();
4020   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4021     const AsmToken NextToken = getLexer().peekTok();
4022     if (NextToken.is(AsmToken::Identifier)) {
4023       Parser.Lex();
4024     }
4025   }
4026 
4027   return false;
4028 }
4029 
4030 OperandMatchResultTy
4031 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4032   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4033   int64_t Waitcnt = getWaitcntBitMask(ISA);
4034   SMLoc S = Parser.getTok().getLoc();
4035 
4036   switch(getLexer().getKind()) {
4037     default: return MatchOperand_ParseFail;
4038     case AsmToken::Integer:
4039       // The operand can be an integer value.
4040       if (getParser().parseAbsoluteExpression(Waitcnt))
4041         return MatchOperand_ParseFail;
4042       break;
4043 
4044     case AsmToken::Identifier:
4045       do {
4046         if (parseCnt(Waitcnt))
4047           return MatchOperand_ParseFail;
4048       } while(getLexer().isNot(AsmToken::EndOfStatement));
4049       break;
4050   }
4051   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4052   return MatchOperand_Success;
4053 }
4054 
4055 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4056                                           int64_t &Width) {
4057   using namespace llvm::AMDGPU::Hwreg;
4058 
4059   if (Parser.getTok().getString() != "hwreg")
4060     return true;
4061   Parser.Lex();
4062 
4063   if (getLexer().isNot(AsmToken::LParen))
4064     return true;
4065   Parser.Lex();
4066 
4067   if (getLexer().is(AsmToken::Identifier)) {
4068     HwReg.IsSymbolic = true;
4069     HwReg.Id = ID_UNKNOWN_;
4070     const StringRef tok = Parser.getTok().getString();
4071     int Last = ID_SYMBOLIC_LAST_;
4072     if (isSI() || isCI() || isVI())
4073       Last = ID_SYMBOLIC_FIRST_GFX9_;
4074     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4075       if (tok == IdSymbolic[i]) {
4076         HwReg.Id = i;
4077         break;
4078       }
4079     }
4080     Parser.Lex();
4081   } else {
4082     HwReg.IsSymbolic = false;
4083     if (getLexer().isNot(AsmToken::Integer))
4084       return true;
4085     if (getParser().parseAbsoluteExpression(HwReg.Id))
4086       return true;
4087   }
4088 
4089   if (getLexer().is(AsmToken::RParen)) {
4090     Parser.Lex();
4091     return false;
4092   }
4093 
4094   // optional params
4095   if (getLexer().isNot(AsmToken::Comma))
4096     return true;
4097   Parser.Lex();
4098 
4099   if (getLexer().isNot(AsmToken::Integer))
4100     return true;
4101   if (getParser().parseAbsoluteExpression(Offset))
4102     return true;
4103 
4104   if (getLexer().isNot(AsmToken::Comma))
4105     return true;
4106   Parser.Lex();
4107 
4108   if (getLexer().isNot(AsmToken::Integer))
4109     return true;
4110   if (getParser().parseAbsoluteExpression(Width))
4111     return true;
4112 
4113   if (getLexer().isNot(AsmToken::RParen))
4114     return true;
4115   Parser.Lex();
4116 
4117   return false;
4118 }
4119 
4120 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4121   using namespace llvm::AMDGPU::Hwreg;
4122 
4123   int64_t Imm16Val = 0;
4124   SMLoc S = Parser.getTok().getLoc();
4125 
4126   switch(getLexer().getKind()) {
4127     default: return MatchOperand_NoMatch;
4128     case AsmToken::Integer:
4129       // The operand can be an integer value.
4130       if (getParser().parseAbsoluteExpression(Imm16Val))
4131         return MatchOperand_NoMatch;
4132       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4133         Error(S, "invalid immediate: only 16-bit values are legal");
4134         // Do not return error code, but create an imm operand anyway and proceed
4135         // to the next operand, if any. That avoids unneccessary error messages.
4136       }
4137       break;
4138 
4139     case AsmToken::Identifier: {
4140         OperandInfoTy HwReg(ID_UNKNOWN_);
4141         int64_t Offset = OFFSET_DEFAULT_;
4142         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4143         if (parseHwregConstruct(HwReg, Offset, Width))
4144           return MatchOperand_ParseFail;
4145         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4146           if (HwReg.IsSymbolic)
4147             Error(S, "invalid symbolic name of hardware register");
4148           else
4149             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4150         }
4151         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4152           Error(S, "invalid bit offset: only 5-bit values are legal");
4153         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4154           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4155         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4156       }
4157       break;
4158   }
4159   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4160   return MatchOperand_Success;
4161 }
4162 
4163 bool AMDGPUOperand::isSWaitCnt() const {
4164   return isImm();
4165 }
4166 
4167 bool AMDGPUOperand::isHwreg() const {
4168   return isImmTy(ImmTyHwreg);
4169 }
4170 
4171 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4172   using namespace llvm::AMDGPU::SendMsg;
4173 
4174   if (Parser.getTok().getString() != "sendmsg")
4175     return true;
4176   Parser.Lex();
4177 
4178   if (getLexer().isNot(AsmToken::LParen))
4179     return true;
4180   Parser.Lex();
4181 
4182   if (getLexer().is(AsmToken::Identifier)) {
4183     Msg.IsSymbolic = true;
4184     Msg.Id = ID_UNKNOWN_;
4185     const std::string tok = Parser.getTok().getString();
4186     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4187       switch(i) {
4188         default: continue; // Omit gaps.
4189         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4190       }
4191       if (tok == IdSymbolic[i]) {
4192         Msg.Id = i;
4193         break;
4194       }
4195     }
4196     Parser.Lex();
4197   } else {
4198     Msg.IsSymbolic = false;
4199     if (getLexer().isNot(AsmToken::Integer))
4200       return true;
4201     if (getParser().parseAbsoluteExpression(Msg.Id))
4202       return true;
4203     if (getLexer().is(AsmToken::Integer))
4204       if (getParser().parseAbsoluteExpression(Msg.Id))
4205         Msg.Id = ID_UNKNOWN_;
4206   }
4207   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4208     return false;
4209 
4210   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4211     if (getLexer().isNot(AsmToken::RParen))
4212       return true;
4213     Parser.Lex();
4214     return false;
4215   }
4216 
4217   if (getLexer().isNot(AsmToken::Comma))
4218     return true;
4219   Parser.Lex();
4220 
4221   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4222   Operation.Id = ID_UNKNOWN_;
4223   if (getLexer().is(AsmToken::Identifier)) {
4224     Operation.IsSymbolic = true;
4225     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4226     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4227     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4228     const StringRef Tok = Parser.getTok().getString();
4229     for (int i = F; i < L; ++i) {
4230       if (Tok == S[i]) {
4231         Operation.Id = i;
4232         break;
4233       }
4234     }
4235     Parser.Lex();
4236   } else {
4237     Operation.IsSymbolic = false;
4238     if (getLexer().isNot(AsmToken::Integer))
4239       return true;
4240     if (getParser().parseAbsoluteExpression(Operation.Id))
4241       return true;
4242   }
4243 
4244   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4245     // Stream id is optional.
4246     if (getLexer().is(AsmToken::RParen)) {
4247       Parser.Lex();
4248       return false;
4249     }
4250 
4251     if (getLexer().isNot(AsmToken::Comma))
4252       return true;
4253     Parser.Lex();
4254 
4255     if (getLexer().isNot(AsmToken::Integer))
4256       return true;
4257     if (getParser().parseAbsoluteExpression(StreamId))
4258       return true;
4259   }
4260 
4261   if (getLexer().isNot(AsmToken::RParen))
4262     return true;
4263   Parser.Lex();
4264   return false;
4265 }
4266 
4267 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4268   if (getLexer().getKind() != AsmToken::Identifier)
4269     return MatchOperand_NoMatch;
4270 
4271   StringRef Str = Parser.getTok().getString();
4272   int Slot = StringSwitch<int>(Str)
4273     .Case("p10", 0)
4274     .Case("p20", 1)
4275     .Case("p0", 2)
4276     .Default(-1);
4277 
4278   SMLoc S = Parser.getTok().getLoc();
4279   if (Slot == -1)
4280     return MatchOperand_ParseFail;
4281 
4282   Parser.Lex();
4283   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4284                                               AMDGPUOperand::ImmTyInterpSlot));
4285   return MatchOperand_Success;
4286 }
4287 
4288 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4289   if (getLexer().getKind() != AsmToken::Identifier)
4290     return MatchOperand_NoMatch;
4291 
4292   StringRef Str = Parser.getTok().getString();
4293   if (!Str.startswith("attr"))
4294     return MatchOperand_NoMatch;
4295 
4296   StringRef Chan = Str.take_back(2);
4297   int AttrChan = StringSwitch<int>(Chan)
4298     .Case(".x", 0)
4299     .Case(".y", 1)
4300     .Case(".z", 2)
4301     .Case(".w", 3)
4302     .Default(-1);
4303   if (AttrChan == -1)
4304     return MatchOperand_ParseFail;
4305 
4306   Str = Str.drop_back(2).drop_front(4);
4307 
4308   uint8_t Attr;
4309   if (Str.getAsInteger(10, Attr))
4310     return MatchOperand_ParseFail;
4311 
4312   SMLoc S = Parser.getTok().getLoc();
4313   Parser.Lex();
4314   if (Attr > 63) {
4315     Error(S, "out of bounds attr");
4316     return MatchOperand_Success;
4317   }
4318 
4319   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4320 
4321   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4322                                               AMDGPUOperand::ImmTyInterpAttr));
4323   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4324                                               AMDGPUOperand::ImmTyAttrChan));
4325   return MatchOperand_Success;
4326 }
4327 
4328 void AMDGPUAsmParser::errorExpTgt() {
4329   Error(Parser.getTok().getLoc(), "invalid exp target");
4330 }
4331 
4332 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4333                                                       uint8_t &Val) {
4334   if (Str == "null") {
4335     Val = 9;
4336     return MatchOperand_Success;
4337   }
4338 
4339   if (Str.startswith("mrt")) {
4340     Str = Str.drop_front(3);
4341     if (Str == "z") { // == mrtz
4342       Val = 8;
4343       return MatchOperand_Success;
4344     }
4345 
4346     if (Str.getAsInteger(10, Val))
4347       return MatchOperand_ParseFail;
4348 
4349     if (Val > 7)
4350       errorExpTgt();
4351 
4352     return MatchOperand_Success;
4353   }
4354 
4355   if (Str.startswith("pos")) {
4356     Str = Str.drop_front(3);
4357     if (Str.getAsInteger(10, Val))
4358       return MatchOperand_ParseFail;
4359 
4360     if (Val > 3)
4361       errorExpTgt();
4362 
4363     Val += 12;
4364     return MatchOperand_Success;
4365   }
4366 
4367   if (Str.startswith("param")) {
4368     Str = Str.drop_front(5);
4369     if (Str.getAsInteger(10, Val))
4370       return MatchOperand_ParseFail;
4371 
4372     if (Val >= 32)
4373       errorExpTgt();
4374 
4375     Val += 32;
4376     return MatchOperand_Success;
4377   }
4378 
4379   if (Str.startswith("invalid_target_")) {
4380     Str = Str.drop_front(15);
4381     if (Str.getAsInteger(10, Val))
4382       return MatchOperand_ParseFail;
4383 
4384     errorExpTgt();
4385     return MatchOperand_Success;
4386   }
4387 
4388   return MatchOperand_NoMatch;
4389 }
4390 
4391 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4392   uint8_t Val;
4393   StringRef Str = Parser.getTok().getString();
4394 
4395   auto Res = parseExpTgtImpl(Str, Val);
4396   if (Res != MatchOperand_Success)
4397     return Res;
4398 
4399   SMLoc S = Parser.getTok().getLoc();
4400   Parser.Lex();
4401 
4402   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4403                                               AMDGPUOperand::ImmTyExpTgt));
4404   return MatchOperand_Success;
4405 }
4406 
4407 OperandMatchResultTy
4408 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4409   using namespace llvm::AMDGPU::SendMsg;
4410 
4411   int64_t Imm16Val = 0;
4412   SMLoc S = Parser.getTok().getLoc();
4413 
4414   switch(getLexer().getKind()) {
4415   default:
4416     return MatchOperand_NoMatch;
4417   case AsmToken::Integer:
4418     // The operand can be an integer value.
4419     if (getParser().parseAbsoluteExpression(Imm16Val))
4420       return MatchOperand_NoMatch;
4421     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4422       Error(S, "invalid immediate: only 16-bit values are legal");
4423       // Do not return error code, but create an imm operand anyway and proceed
4424       // to the next operand, if any. That avoids unneccessary error messages.
4425     }
4426     break;
4427   case AsmToken::Identifier: {
4428       OperandInfoTy Msg(ID_UNKNOWN_);
4429       OperandInfoTy Operation(OP_UNKNOWN_);
4430       int64_t StreamId = STREAM_ID_DEFAULT_;
4431       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4432         return MatchOperand_ParseFail;
4433       do {
4434         // Validate and encode message ID.
4435         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4436                 || Msg.Id == ID_SYSMSG)) {
4437           if (Msg.IsSymbolic)
4438             Error(S, "invalid/unsupported symbolic name of message");
4439           else
4440             Error(S, "invalid/unsupported code of message");
4441           break;
4442         }
4443         Imm16Val = (Msg.Id << ID_SHIFT_);
4444         // Validate and encode operation ID.
4445         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4446           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4447             if (Operation.IsSymbolic)
4448               Error(S, "invalid symbolic name of GS_OP");
4449             else
4450               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4451             break;
4452           }
4453           if (Operation.Id == OP_GS_NOP
4454               && Msg.Id != ID_GS_DONE) {
4455             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4456             break;
4457           }
4458           Imm16Val |= (Operation.Id << OP_SHIFT_);
4459         }
4460         if (Msg.Id == ID_SYSMSG) {
4461           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4462             if (Operation.IsSymbolic)
4463               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4464             else
4465               Error(S, "invalid/unsupported code of SYSMSG_OP");
4466             break;
4467           }
4468           Imm16Val |= (Operation.Id << OP_SHIFT_);
4469         }
4470         // Validate and encode stream ID.
4471         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4472           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4473             Error(S, "invalid stream id: only 2-bit values are legal");
4474             break;
4475           }
4476           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4477         }
4478       } while (false);
4479     }
4480     break;
4481   }
4482   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4483   return MatchOperand_Success;
4484 }
4485 
4486 bool AMDGPUOperand::isSendMsg() const {
4487   return isImmTy(ImmTySendMsg);
4488 }
4489 
4490 //===----------------------------------------------------------------------===//
4491 // parser helpers
4492 //===----------------------------------------------------------------------===//
4493 
4494 bool
4495 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4496   if (getLexer().getKind() == AsmToken::Identifier &&
4497       Parser.getTok().getString() == Id) {
4498     Parser.Lex();
4499     return true;
4500   }
4501   return false;
4502 }
4503 
4504 bool
4505 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4506   if (getLexer().getKind() == Kind) {
4507     Parser.Lex();
4508     return true;
4509   }
4510   return false;
4511 }
4512 
4513 bool
4514 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4515                            const StringRef ErrMsg) {
4516   if (!trySkipToken(Kind)) {
4517     Error(Parser.getTok().getLoc(), ErrMsg);
4518     return false;
4519   }
4520   return true;
4521 }
4522 
4523 bool
4524 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4525   return !getParser().parseAbsoluteExpression(Imm);
4526 }
4527 
4528 bool
4529 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4530   SMLoc S = Parser.getTok().getLoc();
4531   if (getLexer().getKind() == AsmToken::String) {
4532     Val = Parser.getTok().getStringContents();
4533     Parser.Lex();
4534     return true;
4535   } else {
4536     Error(S, ErrMsg);
4537     return false;
4538   }
4539 }
4540 
4541 //===----------------------------------------------------------------------===//
4542 // swizzle
4543 //===----------------------------------------------------------------------===//
4544 
4545 LLVM_READNONE
4546 static unsigned
4547 encodeBitmaskPerm(const unsigned AndMask,
4548                   const unsigned OrMask,
4549                   const unsigned XorMask) {
4550   using namespace llvm::AMDGPU::Swizzle;
4551 
4552   return BITMASK_PERM_ENC |
4553          (AndMask << BITMASK_AND_SHIFT) |
4554          (OrMask  << BITMASK_OR_SHIFT)  |
4555          (XorMask << BITMASK_XOR_SHIFT);
4556 }
4557 
4558 bool
4559 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4560                                       const unsigned MinVal,
4561                                       const unsigned MaxVal,
4562                                       const StringRef ErrMsg) {
4563   for (unsigned i = 0; i < OpNum; ++i) {
4564     if (!skipToken(AsmToken::Comma, "expected a comma")){
4565       return false;
4566     }
4567     SMLoc ExprLoc = Parser.getTok().getLoc();
4568     if (!parseExpr(Op[i])) {
4569       return false;
4570     }
4571     if (Op[i] < MinVal || Op[i] > MaxVal) {
4572       Error(ExprLoc, ErrMsg);
4573       return false;
4574     }
4575   }
4576 
4577   return true;
4578 }
4579 
4580 bool
4581 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4582   using namespace llvm::AMDGPU::Swizzle;
4583 
4584   int64_t Lane[LANE_NUM];
4585   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4586                            "expected a 2-bit lane id")) {
4587     Imm = QUAD_PERM_ENC;
4588     for (unsigned I = 0; I < LANE_NUM; ++I) {
4589       Imm |= Lane[I] << (LANE_SHIFT * I);
4590     }
4591     return true;
4592   }
4593   return false;
4594 }
4595 
4596 bool
4597 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4598   using namespace llvm::AMDGPU::Swizzle;
4599 
4600   SMLoc S = Parser.getTok().getLoc();
4601   int64_t GroupSize;
4602   int64_t LaneIdx;
4603 
4604   if (!parseSwizzleOperands(1, &GroupSize,
4605                             2, 32,
4606                             "group size must be in the interval [2,32]")) {
4607     return false;
4608   }
4609   if (!isPowerOf2_64(GroupSize)) {
4610     Error(S, "group size must be a power of two");
4611     return false;
4612   }
4613   if (parseSwizzleOperands(1, &LaneIdx,
4614                            0, GroupSize - 1,
4615                            "lane id must be in the interval [0,group size - 1]")) {
4616     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4617     return true;
4618   }
4619   return false;
4620 }
4621 
4622 bool
4623 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4624   using namespace llvm::AMDGPU::Swizzle;
4625 
4626   SMLoc S = Parser.getTok().getLoc();
4627   int64_t GroupSize;
4628 
4629   if (!parseSwizzleOperands(1, &GroupSize,
4630       2, 32, "group size must be in the interval [2,32]")) {
4631     return false;
4632   }
4633   if (!isPowerOf2_64(GroupSize)) {
4634     Error(S, "group size must be a power of two");
4635     return false;
4636   }
4637 
4638   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4639   return true;
4640 }
4641 
4642 bool
4643 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4644   using namespace llvm::AMDGPU::Swizzle;
4645 
4646   SMLoc S = Parser.getTok().getLoc();
4647   int64_t GroupSize;
4648 
4649   if (!parseSwizzleOperands(1, &GroupSize,
4650       1, 16, "group size must be in the interval [1,16]")) {
4651     return false;
4652   }
4653   if (!isPowerOf2_64(GroupSize)) {
4654     Error(S, "group size must be a power of two");
4655     return false;
4656   }
4657 
4658   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4659   return true;
4660 }
4661 
4662 bool
4663 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4664   using namespace llvm::AMDGPU::Swizzle;
4665 
4666   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4667     return false;
4668   }
4669 
4670   StringRef Ctl;
4671   SMLoc StrLoc = Parser.getTok().getLoc();
4672   if (!parseString(Ctl)) {
4673     return false;
4674   }
4675   if (Ctl.size() != BITMASK_WIDTH) {
4676     Error(StrLoc, "expected a 5-character mask");
4677     return false;
4678   }
4679 
4680   unsigned AndMask = 0;
4681   unsigned OrMask = 0;
4682   unsigned XorMask = 0;
4683 
4684   for (size_t i = 0; i < Ctl.size(); ++i) {
4685     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4686     switch(Ctl[i]) {
4687     default:
4688       Error(StrLoc, "invalid mask");
4689       return false;
4690     case '0':
4691       break;
4692     case '1':
4693       OrMask |= Mask;
4694       break;
4695     case 'p':
4696       AndMask |= Mask;
4697       break;
4698     case 'i':
4699       AndMask |= Mask;
4700       XorMask |= Mask;
4701       break;
4702     }
4703   }
4704 
4705   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4706   return true;
4707 }
4708 
4709 bool
4710 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4711 
4712   SMLoc OffsetLoc = Parser.getTok().getLoc();
4713 
4714   if (!parseExpr(Imm)) {
4715     return false;
4716   }
4717   if (!isUInt<16>(Imm)) {
4718     Error(OffsetLoc, "expected a 16-bit offset");
4719     return false;
4720   }
4721   return true;
4722 }
4723 
4724 bool
4725 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4726   using namespace llvm::AMDGPU::Swizzle;
4727 
4728   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4729 
4730     SMLoc ModeLoc = Parser.getTok().getLoc();
4731     bool Ok = false;
4732 
4733     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4734       Ok = parseSwizzleQuadPerm(Imm);
4735     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4736       Ok = parseSwizzleBitmaskPerm(Imm);
4737     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4738       Ok = parseSwizzleBroadcast(Imm);
4739     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4740       Ok = parseSwizzleSwap(Imm);
4741     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4742       Ok = parseSwizzleReverse(Imm);
4743     } else {
4744       Error(ModeLoc, "expected a swizzle mode");
4745     }
4746 
4747     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4748   }
4749 
4750   return false;
4751 }
4752 
4753 OperandMatchResultTy
4754 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4755   SMLoc S = Parser.getTok().getLoc();
4756   int64_t Imm = 0;
4757 
4758   if (trySkipId("offset")) {
4759 
4760     bool Ok = false;
4761     if (skipToken(AsmToken::Colon, "expected a colon")) {
4762       if (trySkipId("swizzle")) {
4763         Ok = parseSwizzleMacro(Imm);
4764       } else {
4765         Ok = parseSwizzleOffset(Imm);
4766       }
4767     }
4768 
4769     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4770 
4771     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4772   } else {
4773     // Swizzle "offset" operand is optional.
4774     // If it is omitted, try parsing other optional operands.
4775     return parseOptionalOpr(Operands);
4776   }
4777 }
4778 
4779 bool
4780 AMDGPUOperand::isSwizzle() const {
4781   return isImmTy(ImmTySwizzle);
4782 }
4783 
4784 //===----------------------------------------------------------------------===//
4785 // VGPR Index Mode
4786 //===----------------------------------------------------------------------===//
4787 
4788 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4789 
4790   using namespace llvm::AMDGPU::VGPRIndexMode;
4791 
4792   if (trySkipToken(AsmToken::RParen)) {
4793     return OFF;
4794   }
4795 
4796   int64_t Imm = 0;
4797 
4798   while (true) {
4799     unsigned Mode = 0;
4800     SMLoc S = Parser.getTok().getLoc();
4801 
4802     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4803       if (trySkipId(IdSymbolic[ModeId])) {
4804         Mode = 1 << ModeId;
4805         break;
4806       }
4807     }
4808 
4809     if (Mode == 0) {
4810       Error(S, (Imm == 0)?
4811                "expected a VGPR index mode or a closing parenthesis" :
4812                "expected a VGPR index mode");
4813       break;
4814     }
4815 
4816     if (Imm & Mode) {
4817       Error(S, "duplicate VGPR index mode");
4818       break;
4819     }
4820     Imm |= Mode;
4821 
4822     if (trySkipToken(AsmToken::RParen))
4823       break;
4824     if (!skipToken(AsmToken::Comma,
4825                    "expected a comma or a closing parenthesis"))
4826       break;
4827   }
4828 
4829   return Imm;
4830 }
4831 
4832 OperandMatchResultTy
4833 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4834 
4835   int64_t Imm = 0;
4836   SMLoc S = Parser.getTok().getLoc();
4837 
4838   if (getLexer().getKind() == AsmToken::Identifier &&
4839       Parser.getTok().getString() == "gpr_idx" &&
4840       getLexer().peekTok().is(AsmToken::LParen)) {
4841 
4842     Parser.Lex();
4843     Parser.Lex();
4844 
4845     // If parse failed, trigger an error but do not return error code
4846     // to avoid excessive error messages.
4847     Imm = parseGPRIdxMacro();
4848 
4849   } else {
4850     if (getParser().parseAbsoluteExpression(Imm))
4851       return MatchOperand_NoMatch;
4852     if (Imm < 0 || !isUInt<4>(Imm)) {
4853       Error(S, "invalid immediate: only 4-bit values are legal");
4854     }
4855   }
4856 
4857   Operands.push_back(
4858       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
4859   return MatchOperand_Success;
4860 }
4861 
4862 bool AMDGPUOperand::isGPRIdxMode() const {
4863   return isImmTy(ImmTyGprIdxMode);
4864 }
4865 
4866 //===----------------------------------------------------------------------===//
4867 // sopp branch targets
4868 //===----------------------------------------------------------------------===//
4869 
4870 OperandMatchResultTy
4871 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4872   SMLoc S = Parser.getTok().getLoc();
4873 
4874   switch (getLexer().getKind()) {
4875     default: return MatchOperand_ParseFail;
4876     case AsmToken::Integer: {
4877       int64_t Imm;
4878       if (getParser().parseAbsoluteExpression(Imm))
4879         return MatchOperand_ParseFail;
4880       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4881       return MatchOperand_Success;
4882     }
4883 
4884     case AsmToken::Identifier:
4885       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4886           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4887                                   Parser.getTok().getString()), getContext()), S));
4888       Parser.Lex();
4889       return MatchOperand_Success;
4890   }
4891 }
4892 
4893 //===----------------------------------------------------------------------===//
4894 // mubuf
4895 //===----------------------------------------------------------------------===//
4896 
4897 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4898   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4899 }
4900 
4901 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4902   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4903 }
4904 
4905 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4906                                const OperandVector &Operands,
4907                                bool IsAtomic,
4908                                bool IsAtomicReturn,
4909                                bool IsLds) {
4910   bool IsLdsOpcode = IsLds;
4911   bool HasLdsModifier = false;
4912   OptionalImmIndexMap OptionalIdx;
4913   assert(IsAtomicReturn ? IsAtomic : true);
4914   unsigned FirstOperandIdx = 1;
4915 
4916   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
4917     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4918 
4919     // Add the register arguments
4920     if (Op.isReg()) {
4921       Op.addRegOperands(Inst, 1);
4922       // Insert a tied src for atomic return dst.
4923       // This cannot be postponed as subsequent calls to
4924       // addImmOperands rely on correct number of MC operands.
4925       if (IsAtomicReturn && i == FirstOperandIdx)
4926         Op.addRegOperands(Inst, 1);
4927       continue;
4928     }
4929 
4930     // Handle the case where soffset is an immediate
4931     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4932       Op.addImmOperands(Inst, 1);
4933       continue;
4934     }
4935 
4936     HasLdsModifier = Op.isLDS();
4937 
4938     // Handle tokens like 'offen' which are sometimes hard-coded into the
4939     // asm string.  There are no MCInst operands for these.
4940     if (Op.isToken()) {
4941       continue;
4942     }
4943     assert(Op.isImm());
4944 
4945     // Handle optional arguments
4946     OptionalIdx[Op.getImmTy()] = i;
4947   }
4948 
4949   // This is a workaround for an llvm quirk which may result in an
4950   // incorrect instruction selection. Lds and non-lds versions of
4951   // MUBUF instructions are identical except that lds versions
4952   // have mandatory 'lds' modifier. However this modifier follows
4953   // optional modifiers and llvm asm matcher regards this 'lds'
4954   // modifier as an optional one. As a result, an lds version
4955   // of opcode may be selected even if it has no 'lds' modifier.
4956   if (IsLdsOpcode && !HasLdsModifier) {
4957     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4958     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4959       Inst.setOpcode(NoLdsOpcode);
4960       IsLdsOpcode = false;
4961     }
4962   }
4963 
4964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4965   if (!IsAtomic) { // glc is hard-coded.
4966     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4967   }
4968   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4969 
4970   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4971     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4972   }
4973 }
4974 
4975 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4976   OptionalImmIndexMap OptionalIdx;
4977 
4978   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4979     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4980 
4981     // Add the register arguments
4982     if (Op.isReg()) {
4983       Op.addRegOperands(Inst, 1);
4984       continue;
4985     }
4986 
4987     // Handle the case where soffset is an immediate
4988     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4989       Op.addImmOperands(Inst, 1);
4990       continue;
4991     }
4992 
4993     // Handle tokens like 'offen' which are sometimes hard-coded into the
4994     // asm string.  There are no MCInst operands for these.
4995     if (Op.isToken()) {
4996       continue;
4997     }
4998     assert(Op.isImm());
4999 
5000     // Handle optional arguments
5001     OptionalIdx[Op.getImmTy()] = i;
5002   }
5003 
5004   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5005                         AMDGPUOperand::ImmTyOffset);
5006   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5007   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5008   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5009   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5010 }
5011 
5012 //===----------------------------------------------------------------------===//
5013 // mimg
5014 //===----------------------------------------------------------------------===//
5015 
5016 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5017                               bool IsAtomic) {
5018   unsigned I = 1;
5019   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5020   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5021     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5022   }
5023 
5024   if (IsAtomic) {
5025     // Add src, same as dst
5026     assert(Desc.getNumDefs() == 1);
5027     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5028   }
5029 
5030   OptionalImmIndexMap OptionalIdx;
5031 
5032   for (unsigned E = Operands.size(); I != E; ++I) {
5033     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5034 
5035     // Add the register arguments
5036     if (Op.isReg()) {
5037       Op.addRegOperands(Inst, 1);
5038     } else if (Op.isImmModifier()) {
5039       OptionalIdx[Op.getImmTy()] = I;
5040     } else {
5041       llvm_unreachable("unexpected operand type");
5042     }
5043   }
5044 
5045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5047   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5048   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5049   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5050   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5051   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5052   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5053   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5054 }
5055 
5056 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5057   cvtMIMG(Inst, Operands, true);
5058 }
5059 
5060 //===----------------------------------------------------------------------===//
5061 // smrd
5062 //===----------------------------------------------------------------------===//
5063 
5064 bool AMDGPUOperand::isSMRDOffset8() const {
5065   return isImm() && isUInt<8>(getImm());
5066 }
5067 
5068 bool AMDGPUOperand::isSMRDOffset20() const {
5069   return isImm() && isUInt<20>(getImm());
5070 }
5071 
5072 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5073   // 32-bit literals are only supported on CI and we only want to use them
5074   // when the offset is > 8-bits.
5075   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5076 }
5077 
5078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5079   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5080 }
5081 
5082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5083   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5084 }
5085 
5086 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5087   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5088 }
5089 
5090 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5091   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5092 }
5093 
5094 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5095   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5096 }
5097 
5098 //===----------------------------------------------------------------------===//
5099 // vop3
5100 //===----------------------------------------------------------------------===//
5101 
5102 static bool ConvertOmodMul(int64_t &Mul) {
5103   if (Mul != 1 && Mul != 2 && Mul != 4)
5104     return false;
5105 
5106   Mul >>= 1;
5107   return true;
5108 }
5109 
5110 static bool ConvertOmodDiv(int64_t &Div) {
5111   if (Div == 1) {
5112     Div = 0;
5113     return true;
5114   }
5115 
5116   if (Div == 2) {
5117     Div = 3;
5118     return true;
5119   }
5120 
5121   return false;
5122 }
5123 
5124 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5125   if (BoundCtrl == 0) {
5126     BoundCtrl = 1;
5127     return true;
5128   }
5129 
5130   if (BoundCtrl == -1) {
5131     BoundCtrl = 0;
5132     return true;
5133   }
5134 
5135   return false;
5136 }
5137 
5138 // Note: the order in this table matches the order of operands in AsmString.
5139 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5140   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5141   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5142   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5143   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5144   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5145   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5146   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5147   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5148   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5149   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5150   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5151   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5152   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5153   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5154   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5155   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5156   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5157   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5158   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5159   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5160   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5161   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5162   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5163   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5164   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5165   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5166   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5167   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5168   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5169   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5170   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5171   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5172   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5173   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5174   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5175   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5176   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5177 };
5178 
5179 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5180   unsigned size = Operands.size();
5181   assert(size > 0);
5182 
5183   OperandMatchResultTy res = parseOptionalOpr(Operands);
5184 
5185   // This is a hack to enable hardcoded mandatory operands which follow
5186   // optional operands.
5187   //
5188   // Current design assumes that all operands after the first optional operand
5189   // are also optional. However implementation of some instructions violates
5190   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5191   //
5192   // To alleviate this problem, we have to (implicitly) parse extra operands
5193   // to make sure autogenerated parser of custom operands never hit hardcoded
5194   // mandatory operands.
5195 
5196   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5197 
5198     // We have parsed the first optional operand.
5199     // Parse as many operands as necessary to skip all mandatory operands.
5200 
5201     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5202       if (res != MatchOperand_Success ||
5203           getLexer().is(AsmToken::EndOfStatement)) break;
5204       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5205       res = parseOptionalOpr(Operands);
5206     }
5207   }
5208 
5209   return res;
5210 }
5211 
5212 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5213   OperandMatchResultTy res;
5214   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5215     // try to parse any optional operand here
5216     if (Op.IsBit) {
5217       res = parseNamedBit(Op.Name, Operands, Op.Type);
5218     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5219       res = parseOModOperand(Operands);
5220     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5221                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5222                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5223       res = parseSDWASel(Operands, Op.Name, Op.Type);
5224     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5225       res = parseSDWADstUnused(Operands);
5226     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5227                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5228                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5229                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5230       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5231                                         Op.ConvertResult);
5232     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5233       res = parseDfmtNfmt(Operands);
5234     } else {
5235       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5236     }
5237     if (res != MatchOperand_NoMatch) {
5238       return res;
5239     }
5240   }
5241   return MatchOperand_NoMatch;
5242 }
5243 
5244 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5245   StringRef Name = Parser.getTok().getString();
5246   if (Name == "mul") {
5247     return parseIntWithPrefix("mul", Operands,
5248                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5249   }
5250 
5251   if (Name == "div") {
5252     return parseIntWithPrefix("div", Operands,
5253                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5254   }
5255 
5256   return MatchOperand_NoMatch;
5257 }
5258 
5259 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5260   cvtVOP3P(Inst, Operands);
5261 
5262   int Opc = Inst.getOpcode();
5263 
5264   int SrcNum;
5265   const int Ops[] = { AMDGPU::OpName::src0,
5266                       AMDGPU::OpName::src1,
5267                       AMDGPU::OpName::src2 };
5268   for (SrcNum = 0;
5269        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5270        ++SrcNum);
5271   assert(SrcNum > 0);
5272 
5273   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5274   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5275 
5276   if ((OpSel & (1 << SrcNum)) != 0) {
5277     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5278     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5279     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5280   }
5281 }
5282 
5283 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5284       // 1. This operand is input modifiers
5285   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5286       // 2. This is not last operand
5287       && Desc.NumOperands > (OpNum + 1)
5288       // 3. Next operand is register class
5289       && Desc.OpInfo[OpNum + 1].RegClass != -1
5290       // 4. Next register is not tied to any other operand
5291       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5292 }
5293 
5294 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5295 {
5296   OptionalImmIndexMap OptionalIdx;
5297   unsigned Opc = Inst.getOpcode();
5298 
5299   unsigned I = 1;
5300   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5301   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5302     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5303   }
5304 
5305   for (unsigned E = Operands.size(); I != E; ++I) {
5306     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5307     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5308       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5309     } else if (Op.isInterpSlot() ||
5310                Op.isInterpAttr() ||
5311                Op.isAttrChan()) {
5312       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5313     } else if (Op.isImmModifier()) {
5314       OptionalIdx[Op.getImmTy()] = I;
5315     } else {
5316       llvm_unreachable("unhandled operand type");
5317     }
5318   }
5319 
5320   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5321     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5322   }
5323 
5324   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5325     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5326   }
5327 
5328   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5329     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5330   }
5331 }
5332 
5333 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5334                               OptionalImmIndexMap &OptionalIdx) {
5335   unsigned Opc = Inst.getOpcode();
5336 
5337   unsigned I = 1;
5338   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5339   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5340     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5341   }
5342 
5343   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5344     // This instruction has src modifiers
5345     for (unsigned E = Operands.size(); I != E; ++I) {
5346       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5347       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5348         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5349       } else if (Op.isImmModifier()) {
5350         OptionalIdx[Op.getImmTy()] = I;
5351       } else if (Op.isRegOrImm()) {
5352         Op.addRegOrImmOperands(Inst, 1);
5353       } else {
5354         llvm_unreachable("unhandled operand type");
5355       }
5356     }
5357   } else {
5358     // No src modifiers
5359     for (unsigned E = Operands.size(); I != E; ++I) {
5360       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5361       if (Op.isMod()) {
5362         OptionalIdx[Op.getImmTy()] = I;
5363       } else {
5364         Op.addRegOrImmOperands(Inst, 1);
5365       }
5366     }
5367   }
5368 
5369   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5370     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5371   }
5372 
5373   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5374     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5375   }
5376 
5377   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5378   // it has src2 register operand that is tied to dst operand
5379   // we don't allow modifiers for this operand in assembler so src2_modifiers
5380   // should be 0.
5381   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5382       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5383       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5384       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5385     auto it = Inst.begin();
5386     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5387     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5388     ++it;
5389     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5390   }
5391 }
5392 
5393 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5394   OptionalImmIndexMap OptionalIdx;
5395   cvtVOP3(Inst, Operands, OptionalIdx);
5396 }
5397 
5398 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5399                                const OperandVector &Operands) {
5400   OptionalImmIndexMap OptIdx;
5401   const int Opc = Inst.getOpcode();
5402   const MCInstrDesc &Desc = MII.get(Opc);
5403 
5404   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5405 
5406   cvtVOP3(Inst, Operands, OptIdx);
5407 
5408   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5409     assert(!IsPacked);
5410     Inst.addOperand(Inst.getOperand(0));
5411   }
5412 
5413   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5414   // instruction, and then figure out where to actually put the modifiers
5415 
5416   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5417 
5418   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5419   if (OpSelHiIdx != -1) {
5420     int DefaultVal = IsPacked ? -1 : 0;
5421     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5422                           DefaultVal);
5423   }
5424 
5425   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5426   if (NegLoIdx != -1) {
5427     assert(IsPacked);
5428     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5429     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5430   }
5431 
5432   const int Ops[] = { AMDGPU::OpName::src0,
5433                       AMDGPU::OpName::src1,
5434                       AMDGPU::OpName::src2 };
5435   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5436                          AMDGPU::OpName::src1_modifiers,
5437                          AMDGPU::OpName::src2_modifiers };
5438 
5439   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5440 
5441   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5442   unsigned OpSelHi = 0;
5443   unsigned NegLo = 0;
5444   unsigned NegHi = 0;
5445 
5446   if (OpSelHiIdx != -1) {
5447     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5448   }
5449 
5450   if (NegLoIdx != -1) {
5451     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5452     NegLo = Inst.getOperand(NegLoIdx).getImm();
5453     NegHi = Inst.getOperand(NegHiIdx).getImm();
5454   }
5455 
5456   for (int J = 0; J < 3; ++J) {
5457     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5458     if (OpIdx == -1)
5459       break;
5460 
5461     uint32_t ModVal = 0;
5462 
5463     if ((OpSel & (1 << J)) != 0)
5464       ModVal |= SISrcMods::OP_SEL_0;
5465 
5466     if ((OpSelHi & (1 << J)) != 0)
5467       ModVal |= SISrcMods::OP_SEL_1;
5468 
5469     if ((NegLo & (1 << J)) != 0)
5470       ModVal |= SISrcMods::NEG;
5471 
5472     if ((NegHi & (1 << J)) != 0)
5473       ModVal |= SISrcMods::NEG_HI;
5474 
5475     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5476 
5477     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5478   }
5479 }
5480 
5481 //===----------------------------------------------------------------------===//
5482 // dpp
5483 //===----------------------------------------------------------------------===//
5484 
5485 bool AMDGPUOperand::isDPPCtrl() const {
5486   using namespace AMDGPU::DPP;
5487 
5488   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5489   if (result) {
5490     int64_t Imm = getImm();
5491     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5492            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5493            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5494            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5495            (Imm == DppCtrl::WAVE_SHL1) ||
5496            (Imm == DppCtrl::WAVE_ROL1) ||
5497            (Imm == DppCtrl::WAVE_SHR1) ||
5498            (Imm == DppCtrl::WAVE_ROR1) ||
5499            (Imm == DppCtrl::ROW_MIRROR) ||
5500            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5501            (Imm == DppCtrl::BCAST15) ||
5502            (Imm == DppCtrl::BCAST31);
5503   }
5504   return false;
5505 }
5506 
5507 bool AMDGPUOperand::isS16Imm() const {
5508   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5509 }
5510 
5511 bool AMDGPUOperand::isU16Imm() const {
5512   return isImm() && isUInt<16>(getImm());
5513 }
5514 
5515 OperandMatchResultTy
5516 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5517   using namespace AMDGPU::DPP;
5518 
5519   SMLoc S = Parser.getTok().getLoc();
5520   StringRef Prefix;
5521   int64_t Int;
5522 
5523   if (getLexer().getKind() == AsmToken::Identifier) {
5524     Prefix = Parser.getTok().getString();
5525   } else {
5526     return MatchOperand_NoMatch;
5527   }
5528 
5529   if (Prefix == "row_mirror") {
5530     Int = DppCtrl::ROW_MIRROR;
5531     Parser.Lex();
5532   } else if (Prefix == "row_half_mirror") {
5533     Int = DppCtrl::ROW_HALF_MIRROR;
5534     Parser.Lex();
5535   } else {
5536     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5537     if (Prefix != "quad_perm"
5538         && Prefix != "row_shl"
5539         && Prefix != "row_shr"
5540         && Prefix != "row_ror"
5541         && Prefix != "wave_shl"
5542         && Prefix != "wave_rol"
5543         && Prefix != "wave_shr"
5544         && Prefix != "wave_ror"
5545         && Prefix != "row_bcast") {
5546       return MatchOperand_NoMatch;
5547     }
5548 
5549     Parser.Lex();
5550     if (getLexer().isNot(AsmToken::Colon))
5551       return MatchOperand_ParseFail;
5552 
5553     if (Prefix == "quad_perm") {
5554       // quad_perm:[%d,%d,%d,%d]
5555       Parser.Lex();
5556       if (getLexer().isNot(AsmToken::LBrac))
5557         return MatchOperand_ParseFail;
5558       Parser.Lex();
5559 
5560       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5561         return MatchOperand_ParseFail;
5562 
5563       for (int i = 0; i < 3; ++i) {
5564         if (getLexer().isNot(AsmToken::Comma))
5565           return MatchOperand_ParseFail;
5566         Parser.Lex();
5567 
5568         int64_t Temp;
5569         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5570           return MatchOperand_ParseFail;
5571         const int shift = i*2 + 2;
5572         Int += (Temp << shift);
5573       }
5574 
5575       if (getLexer().isNot(AsmToken::RBrac))
5576         return MatchOperand_ParseFail;
5577       Parser.Lex();
5578     } else {
5579       // sel:%d
5580       Parser.Lex();
5581       if (getParser().parseAbsoluteExpression(Int))
5582         return MatchOperand_ParseFail;
5583 
5584       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5585         Int |= DppCtrl::ROW_SHL0;
5586       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5587         Int |= DppCtrl::ROW_SHR0;
5588       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5589         Int |= DppCtrl::ROW_ROR0;
5590       } else if (Prefix == "wave_shl" && 1 == Int) {
5591         Int = DppCtrl::WAVE_SHL1;
5592       } else if (Prefix == "wave_rol" && 1 == Int) {
5593         Int = DppCtrl::WAVE_ROL1;
5594       } else if (Prefix == "wave_shr" && 1 == Int) {
5595         Int = DppCtrl::WAVE_SHR1;
5596       } else if (Prefix == "wave_ror" && 1 == Int) {
5597         Int = DppCtrl::WAVE_ROR1;
5598       } else if (Prefix == "row_bcast") {
5599         if (Int == 15) {
5600           Int = DppCtrl::BCAST15;
5601         } else if (Int == 31) {
5602           Int = DppCtrl::BCAST31;
5603         } else {
5604           return MatchOperand_ParseFail;
5605         }
5606       } else {
5607         return MatchOperand_ParseFail;
5608       }
5609     }
5610   }
5611 
5612   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5613   return MatchOperand_Success;
5614 }
5615 
5616 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5617   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5618 }
5619 
5620 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5621   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5622 }
5623 
5624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5625   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5626 }
5627 
5628 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5629   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5630 }
5631 
5632 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5633   OptionalImmIndexMap OptionalIdx;
5634 
5635   unsigned I = 1;
5636   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5637   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5638     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5639   }
5640 
5641   for (unsigned E = Operands.size(); I != E; ++I) {
5642     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5643                                             MCOI::TIED_TO);
5644     if (TiedTo != -1) {
5645       assert((unsigned)TiedTo < Inst.getNumOperands());
5646       // handle tied old or src2 for MAC instructions
5647       Inst.addOperand(Inst.getOperand(TiedTo));
5648     }
5649     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5650     // Add the register arguments
5651     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5652       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5653       // Skip it.
5654       continue;
5655     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5656       Op.addRegWithFPInputModsOperands(Inst, 2);
5657     } else if (Op.isDPPCtrl()) {
5658       Op.addImmOperands(Inst, 1);
5659     } else if (Op.isImm()) {
5660       // Handle optional arguments
5661       OptionalIdx[Op.getImmTy()] = I;
5662     } else {
5663       llvm_unreachable("Invalid operand type");
5664     }
5665   }
5666 
5667   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5668   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5669   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5670 }
5671 
5672 //===----------------------------------------------------------------------===//
5673 // sdwa
5674 //===----------------------------------------------------------------------===//
5675 
5676 OperandMatchResultTy
5677 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5678                               AMDGPUOperand::ImmTy Type) {
5679   using namespace llvm::AMDGPU::SDWA;
5680 
5681   SMLoc S = Parser.getTok().getLoc();
5682   StringRef Value;
5683   OperandMatchResultTy res;
5684 
5685   res = parseStringWithPrefix(Prefix, Value);
5686   if (res != MatchOperand_Success) {
5687     return res;
5688   }
5689 
5690   int64_t Int;
5691   Int = StringSwitch<int64_t>(Value)
5692         .Case("BYTE_0", SdwaSel::BYTE_0)
5693         .Case("BYTE_1", SdwaSel::BYTE_1)
5694         .Case("BYTE_2", SdwaSel::BYTE_2)
5695         .Case("BYTE_3", SdwaSel::BYTE_3)
5696         .Case("WORD_0", SdwaSel::WORD_0)
5697         .Case("WORD_1", SdwaSel::WORD_1)
5698         .Case("DWORD", SdwaSel::DWORD)
5699         .Default(0xffffffff);
5700   Parser.Lex(); // eat last token
5701 
5702   if (Int == 0xffffffff) {
5703     return MatchOperand_ParseFail;
5704   }
5705 
5706   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5707   return MatchOperand_Success;
5708 }
5709 
5710 OperandMatchResultTy
5711 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5712   using namespace llvm::AMDGPU::SDWA;
5713 
5714   SMLoc S = Parser.getTok().getLoc();
5715   StringRef Value;
5716   OperandMatchResultTy res;
5717 
5718   res = parseStringWithPrefix("dst_unused", Value);
5719   if (res != MatchOperand_Success) {
5720     return res;
5721   }
5722 
5723   int64_t Int;
5724   Int = StringSwitch<int64_t>(Value)
5725         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5726         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5727         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5728         .Default(0xffffffff);
5729   Parser.Lex(); // eat last token
5730 
5731   if (Int == 0xffffffff) {
5732     return MatchOperand_ParseFail;
5733   }
5734 
5735   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5736   return MatchOperand_Success;
5737 }
5738 
5739 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5740   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5741 }
5742 
5743 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5744   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5745 }
5746 
5747 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5748   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5749 }
5750 
5751 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5752   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5753 }
5754 
5755 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5756                               uint64_t BasicInstType, bool skipVcc) {
5757   using namespace llvm::AMDGPU::SDWA;
5758 
5759   OptionalImmIndexMap OptionalIdx;
5760   bool skippedVcc = false;
5761 
5762   unsigned I = 1;
5763   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5764   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5765     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5766   }
5767 
5768   for (unsigned E = Operands.size(); I != E; ++I) {
5769     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5770     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5771       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5772       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5773       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5774       // Skip VCC only if we didn't skip it on previous iteration.
5775       if (BasicInstType == SIInstrFlags::VOP2 &&
5776           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5777         skippedVcc = true;
5778         continue;
5779       } else if (BasicInstType == SIInstrFlags::VOPC &&
5780                  Inst.getNumOperands() == 0) {
5781         skippedVcc = true;
5782         continue;
5783       }
5784     }
5785     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5786       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5787     } else if (Op.isImm()) {
5788       // Handle optional arguments
5789       OptionalIdx[Op.getImmTy()] = I;
5790     } else {
5791       llvm_unreachable("Invalid operand type");
5792     }
5793     skippedVcc = false;
5794   }
5795 
5796   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5797       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5798     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5799     switch (BasicInstType) {
5800     case SIInstrFlags::VOP1:
5801       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5802       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5803         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5804       }
5805       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5806       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5807       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5808       break;
5809 
5810     case SIInstrFlags::VOP2:
5811       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5812       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5813         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5814       }
5815       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5816       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5817       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5818       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5819       break;
5820 
5821     case SIInstrFlags::VOPC:
5822       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5823       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5824       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5825       break;
5826 
5827     default:
5828       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5829     }
5830   }
5831 
5832   // special case v_mac_{f16, f32}:
5833   // it has src2 register operand that is tied to dst operand
5834   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5835       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5836     auto it = Inst.begin();
5837     std::advance(
5838       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5839     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5840   }
5841 }
5842 
5843 /// Force static initialization.
5844 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5845   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5846   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5847 }
5848 
5849 #define GET_REGISTER_MATCHER
5850 #define GET_MATCHER_IMPLEMENTATION
5851 #define GET_MNEMONIC_SPELL_CHECKER
5852 #include "AMDGPUGenAsmMatcher.inc"
5853 
5854 // This fuction should be defined after auto-generated include so that we have
5855 // MatchClassKind enum defined
5856 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5857                                                      unsigned Kind) {
5858   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5859   // But MatchInstructionImpl() expects to meet token and fails to validate
5860   // operand. This method checks if we are given immediate operand but expect to
5861   // get corresponding token.
5862   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5863   switch (Kind) {
5864   case MCK_addr64:
5865     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5866   case MCK_gds:
5867     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5868   case MCK_lds:
5869     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5870   case MCK_glc:
5871     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5872   case MCK_idxen:
5873     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5874   case MCK_offen:
5875     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5876   case MCK_SSrcB32:
5877     // When operands have expression values, they will return true for isToken,
5878     // because it is not possible to distinguish between a token and an
5879     // expression at parse time. MatchInstructionImpl() will always try to
5880     // match an operand as a token, when isToken returns true, and when the
5881     // name of the expression is not a valid token, the match will fail,
5882     // so we need to handle it here.
5883     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5884   case MCK_SSrcF32:
5885     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5886   case MCK_SoppBrTarget:
5887     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5888   case MCK_VReg32OrOff:
5889     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5890   case MCK_InterpSlot:
5891     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5892   case MCK_Attr:
5893     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5894   case MCK_AttrChan:
5895     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5896   default:
5897     return Match_InvalidOperand;
5898   }
5899 }
5900 
5901 //===----------------------------------------------------------------------===//
5902 // endpgm
5903 //===----------------------------------------------------------------------===//
5904 
5905 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
5906   SMLoc S = Parser.getTok().getLoc();
5907   int64_t Imm = 0;
5908 
5909   if (!parseExpr(Imm)) {
5910     // The operand is optional, if not present default to 0
5911     Imm = 0;
5912   }
5913 
5914   if (!isUInt<16>(Imm)) {
5915     Error(S, "expected a 16-bit value");
5916     return MatchOperand_ParseFail;
5917   }
5918 
5919   Operands.push_back(
5920       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
5921   return MatchOperand_Success;
5922 }
5923 
5924 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
5925