1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyDLC,
143     ImmTyGLC,
144     ImmTySLC,
145     ImmTyTFE,
146     ImmTyD16,
147     ImmTyClampSI,
148     ImmTyOModSI,
149     ImmTyDppCtrl,
150     ImmTyDppRowMask,
151     ImmTyDppBankMask,
152     ImmTyDppBoundCtrl,
153     ImmTySdwaDstSel,
154     ImmTySdwaSrc0Sel,
155     ImmTySdwaSrc1Sel,
156     ImmTySdwaDstUnused,
157     ImmTyDMask,
158     ImmTyUNorm,
159     ImmTyDA,
160     ImmTyR128A16,
161     ImmTyLWE,
162     ImmTyExpTgt,
163     ImmTyExpCompr,
164     ImmTyExpVM,
165     ImmTyFORMAT,
166     ImmTyHwreg,
167     ImmTyOff,
168     ImmTySendMsg,
169     ImmTyInterpSlot,
170     ImmTyInterpAttr,
171     ImmTyAttrChan,
172     ImmTyOpSel,
173     ImmTyOpSelHi,
174     ImmTyNegLo,
175     ImmTyNegHi,
176     ImmTySwizzle,
177     ImmTyGprIdxMode,
178     ImmTyEndpgm,
179     ImmTyHigh
180   };
181 
182 private:
183   struct TokOp {
184     const char *Data;
185     unsigned Length;
186   };
187 
188   struct ImmOp {
189     int64_t Val;
190     ImmTy Type;
191     bool IsFPImm;
192     Modifiers Mods;
193   };
194 
195   struct RegOp {
196     unsigned RegNo;
197     Modifiers Mods;
198   };
199 
200   union {
201     TokOp Tok;
202     ImmOp Imm;
203     RegOp Reg;
204     const MCExpr *Expr;
205   };
206 
207 public:
208   bool isToken() const override {
209     if (Kind == Token)
210       return true;
211 
212     if (Kind != Expression || !Expr)
213       return false;
214 
215     // When parsing operands, we can't always tell if something was meant to be
216     // a token, like 'gds', or an expression that references a global variable.
217     // In this case, we assume the string is an expression, and if we need to
218     // interpret is a token, then we treat the symbol name as the token.
219     return isa<MCSymbolRefExpr>(Expr);
220   }
221 
222   bool isImm() const override {
223     return Kind == Immediate;
224   }
225 
226   bool isInlinableImm(MVT type) const;
227   bool isLiteralImm(MVT type) const;
228 
229   bool isRegKind() const {
230     return Kind == Register;
231   }
232 
233   bool isReg() const override {
234     return isRegKind() && !hasModifiers();
235   }
236 
237   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
238     return isRegClass(RCID) || isInlinableImm(type);
239   }
240 
241   bool isRegOrImmWithInt16InputMods() const {
242     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
243   }
244 
245   bool isRegOrImmWithInt32InputMods() const {
246     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
247   }
248 
249   bool isRegOrImmWithInt64InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
251   }
252 
253   bool isRegOrImmWithFP16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
255   }
256 
257   bool isRegOrImmWithFP32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
259   }
260 
261   bool isRegOrImmWithFP64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
263   }
264 
265   bool isVReg() const {
266     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
267            isRegClass(AMDGPU::VReg_64RegClassID) ||
268            isRegClass(AMDGPU::VReg_96RegClassID) ||
269            isRegClass(AMDGPU::VReg_128RegClassID) ||
270            isRegClass(AMDGPU::VReg_256RegClassID) ||
271            isRegClass(AMDGPU::VReg_512RegClassID);
272   }
273 
274   bool isVReg32() const {
275     return isRegClass(AMDGPU::VGPR_32RegClassID);
276   }
277 
278   bool isVReg32OrOff() const {
279     return isOff() || isVReg32();
280   }
281 
282   bool isSDWAOperand(MVT type) const;
283   bool isSDWAFP16Operand() const;
284   bool isSDWAFP32Operand() const;
285   bool isSDWAInt16Operand() const;
286   bool isSDWAInt32Operand() const;
287 
288   bool isImmTy(ImmTy ImmT) const {
289     return isImm() && Imm.Type == ImmT;
290   }
291 
292   bool isImmModifier() const {
293     return isImm() && Imm.Type != ImmTyNone;
294   }
295 
296   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
297   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
298   bool isDMask() const { return isImmTy(ImmTyDMask); }
299   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
300   bool isDA() const { return isImmTy(ImmTyDA); }
301   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
302   bool isLWE() const { return isImmTy(ImmTyLWE); }
303   bool isOff() const { return isImmTy(ImmTyOff); }
304   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
305   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
306   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
307   bool isOffen() const { return isImmTy(ImmTyOffen); }
308   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
309   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
310   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
311   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
312   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
313 
314   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
315   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
316   bool isGDS() const { return isImmTy(ImmTyGDS); }
317   bool isLDS() const { return isImmTy(ImmTyLDS); }
318   bool isDLC() const { return isImmTy(ImmTyDLC); }
319   bool isGLC() const { return isImmTy(ImmTyGLC); }
320   bool isSLC() const { return isImmTy(ImmTySLC); }
321   bool isTFE() const { return isImmTy(ImmTyTFE); }
322   bool isD16() const { return isImmTy(ImmTyD16); }
323   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
324   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
325   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
326   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
327   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
328   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
329   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
330   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
331   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
332   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
333   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
334   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
335   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
336   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
337   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
338   bool isHigh() const { return isImmTy(ImmTyHigh); }
339 
340   bool isMod() const {
341     return isClampSI() || isOModSI();
342   }
343 
344   bool isRegOrImm() const {
345     return isReg() || isImm();
346   }
347 
348   bool isRegClass(unsigned RCID) const;
349 
350   bool isInlineValue() const;
351 
352   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
353     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
354   }
355 
356   bool isSCSrcB16() const {
357     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
358   }
359 
360   bool isSCSrcV2B16() const {
361     return isSCSrcB16();
362   }
363 
364   bool isSCSrcB32() const {
365     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
366   }
367 
368   bool isSCSrcB64() const {
369     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
370   }
371 
372   bool isSCSrcF16() const {
373     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
374   }
375 
376   bool isSCSrcV2F16() const {
377     return isSCSrcF16();
378   }
379 
380   bool isSCSrcF32() const {
381     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
382   }
383 
384   bool isSCSrcF64() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
386   }
387 
388   bool isSSrcB32() const {
389     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
390   }
391 
392   bool isSSrcB16() const {
393     return isSCSrcB16() || isLiteralImm(MVT::i16);
394   }
395 
396   bool isSSrcV2B16() const {
397     llvm_unreachable("cannot happen");
398     return isSSrcB16();
399   }
400 
401   bool isSSrcB64() const {
402     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
403     // See isVSrc64().
404     return isSCSrcB64() || isLiteralImm(MVT::i64);
405   }
406 
407   bool isSSrcF32() const {
408     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
409   }
410 
411   bool isSSrcF64() const {
412     return isSCSrcB64() || isLiteralImm(MVT::f64);
413   }
414 
415   bool isSSrcF16() const {
416     return isSCSrcB16() || isLiteralImm(MVT::f16);
417   }
418 
419   bool isSSrcV2F16() const {
420     llvm_unreachable("cannot happen");
421     return isSSrcF16();
422   }
423 
424   bool isSSrcOrLdsB32() const {
425     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
426            isLiteralImm(MVT::i32) || isExpr();
427   }
428 
429   bool isVCSrcB32() const {
430     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
431   }
432 
433   bool isVCSrcB64() const {
434     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
435   }
436 
437   bool isVCSrcB16() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
439   }
440 
441   bool isVCSrcV2B16() const {
442     return isVCSrcB16();
443   }
444 
445   bool isVCSrcF32() const {
446     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
447   }
448 
449   bool isVCSrcF64() const {
450     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
451   }
452 
453   bool isVCSrcF16() const {
454     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
455   }
456 
457   bool isVCSrcV2F16() const {
458     return isVCSrcF16();
459   }
460 
461   bool isVSrcB32() const {
462     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
463   }
464 
465   bool isVSrcB64() const {
466     return isVCSrcF64() || isLiteralImm(MVT::i64);
467   }
468 
469   bool isVSrcB16() const {
470     return isVCSrcF16() || isLiteralImm(MVT::i16);
471   }
472 
473   bool isVSrcV2B16() const {
474     llvm_unreachable("cannot happen");
475     return isVSrcB16();
476   }
477 
478   bool isVSrcF32() const {
479     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
480   }
481 
482   bool isVSrcF64() const {
483     return isVCSrcF64() || isLiteralImm(MVT::f64);
484   }
485 
486   bool isVSrcF16() const {
487     return isVCSrcF16() || isLiteralImm(MVT::f16);
488   }
489 
490   bool isVSrcV2F16() const {
491     llvm_unreachable("cannot happen");
492     return isVSrcF16();
493   }
494 
495   bool isKImmFP32() const {
496     return isLiteralImm(MVT::f32);
497   }
498 
499   bool isKImmFP16() const {
500     return isLiteralImm(MVT::f16);
501   }
502 
503   bool isMem() const override {
504     return false;
505   }
506 
507   bool isExpr() const {
508     return Kind == Expression;
509   }
510 
511   bool isSoppBrTarget() const {
512     return isExpr() || isImm();
513   }
514 
515   bool isSWaitCnt() const;
516   bool isHwreg() const;
517   bool isSendMsg() const;
518   bool isSwizzle() const;
519   bool isSMRDOffset8() const;
520   bool isSMRDOffset20() const;
521   bool isSMRDLiteralOffset() const;
522   bool isDPPCtrl() const;
523   bool isGPRIdxMode() const;
524   bool isS16Imm() const;
525   bool isU16Imm() const;
526   bool isEndpgm() const;
527 
528   StringRef getExpressionAsToken() const {
529     assert(isExpr());
530     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
531     return S->getSymbol().getName();
532   }
533 
534   StringRef getToken() const {
535     assert(isToken());
536 
537     if (Kind == Expression)
538       return getExpressionAsToken();
539 
540     return StringRef(Tok.Data, Tok.Length);
541   }
542 
543   int64_t getImm() const {
544     assert(isImm());
545     return Imm.Val;
546   }
547 
548   ImmTy getImmTy() const {
549     assert(isImm());
550     return Imm.Type;
551   }
552 
553   unsigned getReg() const override {
554     assert(isRegKind());
555     return Reg.RegNo;
556   }
557 
558   SMLoc getStartLoc() const override {
559     return StartLoc;
560   }
561 
562   SMLoc getEndLoc() const override {
563     return EndLoc;
564   }
565 
566   SMRange getLocRange() const {
567     return SMRange(StartLoc, EndLoc);
568   }
569 
570   Modifiers getModifiers() const {
571     assert(isRegKind() || isImmTy(ImmTyNone));
572     return isRegKind() ? Reg.Mods : Imm.Mods;
573   }
574 
575   void setModifiers(Modifiers Mods) {
576     assert(isRegKind() || isImmTy(ImmTyNone));
577     if (isRegKind())
578       Reg.Mods = Mods;
579     else
580       Imm.Mods = Mods;
581   }
582 
583   bool hasModifiers() const {
584     return getModifiers().hasModifiers();
585   }
586 
587   bool hasFPModifiers() const {
588     return getModifiers().hasFPModifiers();
589   }
590 
591   bool hasIntModifiers() const {
592     return getModifiers().hasIntModifiers();
593   }
594 
595   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
596 
597   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
598 
599   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
600 
601   template <unsigned Bitwidth>
602   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
603 
604   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
605     addKImmFPOperands<16>(Inst, N);
606   }
607 
608   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
609     addKImmFPOperands<32>(Inst, N);
610   }
611 
612   void addRegOperands(MCInst &Inst, unsigned N) const;
613 
614   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
615     if (isRegKind())
616       addRegOperands(Inst, N);
617     else if (isExpr())
618       Inst.addOperand(MCOperand::createExpr(Expr));
619     else
620       addImmOperands(Inst, N);
621   }
622 
623   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
624     Modifiers Mods = getModifiers();
625     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
626     if (isRegKind()) {
627       addRegOperands(Inst, N);
628     } else {
629       addImmOperands(Inst, N, false);
630     }
631   }
632 
633   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
634     assert(!hasIntModifiers());
635     addRegOrImmWithInputModsOperands(Inst, N);
636   }
637 
638   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
639     assert(!hasFPModifiers());
640     addRegOrImmWithInputModsOperands(Inst, N);
641   }
642 
643   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
644     Modifiers Mods = getModifiers();
645     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
646     assert(isRegKind());
647     addRegOperands(Inst, N);
648   }
649 
650   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
651     assert(!hasIntModifiers());
652     addRegWithInputModsOperands(Inst, N);
653   }
654 
655   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
656     assert(!hasFPModifiers());
657     addRegWithInputModsOperands(Inst, N);
658   }
659 
660   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
661     if (isImm())
662       addImmOperands(Inst, N);
663     else {
664       assert(isExpr());
665       Inst.addOperand(MCOperand::createExpr(Expr));
666     }
667   }
668 
669   static void printImmTy(raw_ostream& OS, ImmTy Type) {
670     switch (Type) {
671     case ImmTyNone: OS << "None"; break;
672     case ImmTyGDS: OS << "GDS"; break;
673     case ImmTyLDS: OS << "LDS"; break;
674     case ImmTyOffen: OS << "Offen"; break;
675     case ImmTyIdxen: OS << "Idxen"; break;
676     case ImmTyAddr64: OS << "Addr64"; break;
677     case ImmTyOffset: OS << "Offset"; break;
678     case ImmTyInstOffset: OS << "InstOffset"; break;
679     case ImmTyOffset0: OS << "Offset0"; break;
680     case ImmTyOffset1: OS << "Offset1"; break;
681     case ImmTyDLC: OS << "DLC"; break;
682     case ImmTyGLC: OS << "GLC"; break;
683     case ImmTySLC: OS << "SLC"; break;
684     case ImmTyTFE: OS << "TFE"; break;
685     case ImmTyD16: OS << "D16"; break;
686     case ImmTyFORMAT: OS << "FORMAT"; break;
687     case ImmTyClampSI: OS << "ClampSI"; break;
688     case ImmTyOModSI: OS << "OModSI"; break;
689     case ImmTyDppCtrl: OS << "DppCtrl"; break;
690     case ImmTyDppRowMask: OS << "DppRowMask"; break;
691     case ImmTyDppBankMask: OS << "DppBankMask"; break;
692     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
693     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
694     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
695     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
696     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
697     case ImmTyDMask: OS << "DMask"; break;
698     case ImmTyUNorm: OS << "UNorm"; break;
699     case ImmTyDA: OS << "DA"; break;
700     case ImmTyR128A16: OS << "R128A16"; break;
701     case ImmTyLWE: OS << "LWE"; break;
702     case ImmTyOff: OS << "Off"; break;
703     case ImmTyExpTgt: OS << "ExpTgt"; break;
704     case ImmTyExpCompr: OS << "ExpCompr"; break;
705     case ImmTyExpVM: OS << "ExpVM"; break;
706     case ImmTyHwreg: OS << "Hwreg"; break;
707     case ImmTySendMsg: OS << "SendMsg"; break;
708     case ImmTyInterpSlot: OS << "InterpSlot"; break;
709     case ImmTyInterpAttr: OS << "InterpAttr"; break;
710     case ImmTyAttrChan: OS << "AttrChan"; break;
711     case ImmTyOpSel: OS << "OpSel"; break;
712     case ImmTyOpSelHi: OS << "OpSelHi"; break;
713     case ImmTyNegLo: OS << "NegLo"; break;
714     case ImmTyNegHi: OS << "NegHi"; break;
715     case ImmTySwizzle: OS << "Swizzle"; break;
716     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
717     case ImmTyHigh: OS << "High"; break;
718     case ImmTyEndpgm:
719       OS << "Endpgm";
720       break;
721     }
722   }
723 
724   void print(raw_ostream &OS) const override {
725     switch (Kind) {
726     case Register:
727       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
728       break;
729     case Immediate:
730       OS << '<' << getImm();
731       if (getImmTy() != ImmTyNone) {
732         OS << " type: "; printImmTy(OS, getImmTy());
733       }
734       OS << " mods: " << Imm.Mods << '>';
735       break;
736     case Token:
737       OS << '\'' << getToken() << '\'';
738       break;
739     case Expression:
740       OS << "<expr " << *Expr << '>';
741       break;
742     }
743   }
744 
745   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
746                                       int64_t Val, SMLoc Loc,
747                                       ImmTy Type = ImmTyNone,
748                                       bool IsFPImm = false) {
749     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
750     Op->Imm.Val = Val;
751     Op->Imm.IsFPImm = IsFPImm;
752     Op->Imm.Type = Type;
753     Op->Imm.Mods = Modifiers();
754     Op->StartLoc = Loc;
755     Op->EndLoc = Loc;
756     return Op;
757   }
758 
759   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
760                                         StringRef Str, SMLoc Loc,
761                                         bool HasExplicitEncodingSize = true) {
762     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
763     Res->Tok.Data = Str.data();
764     Res->Tok.Length = Str.size();
765     Res->StartLoc = Loc;
766     Res->EndLoc = Loc;
767     return Res;
768   }
769 
770   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
771                                       unsigned RegNo, SMLoc S,
772                                       SMLoc E) {
773     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
774     Op->Reg.RegNo = RegNo;
775     Op->Reg.Mods = Modifiers();
776     Op->StartLoc = S;
777     Op->EndLoc = E;
778     return Op;
779   }
780 
781   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
782                                        const class MCExpr *Expr, SMLoc S) {
783     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
784     Op->Expr = Expr;
785     Op->StartLoc = S;
786     Op->EndLoc = S;
787     return Op;
788   }
789 };
790 
791 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
792   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
793   return OS;
794 }
795 
796 //===----------------------------------------------------------------------===//
797 // AsmParser
798 //===----------------------------------------------------------------------===//
799 
800 // Holds info related to the current kernel, e.g. count of SGPRs used.
801 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
802 // .amdgpu_hsa_kernel or at EOF.
803 class KernelScopeInfo {
804   int SgprIndexUnusedMin = -1;
805   int VgprIndexUnusedMin = -1;
806   MCContext *Ctx = nullptr;
807 
808   void usesSgprAt(int i) {
809     if (i >= SgprIndexUnusedMin) {
810       SgprIndexUnusedMin = ++i;
811       if (Ctx) {
812         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
813         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
814       }
815     }
816   }
817 
818   void usesVgprAt(int i) {
819     if (i >= VgprIndexUnusedMin) {
820       VgprIndexUnusedMin = ++i;
821       if (Ctx) {
822         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
823         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
824       }
825     }
826   }
827 
828 public:
829   KernelScopeInfo() = default;
830 
831   void initialize(MCContext &Context) {
832     Ctx = &Context;
833     usesSgprAt(SgprIndexUnusedMin = -1);
834     usesVgprAt(VgprIndexUnusedMin = -1);
835   }
836 
837   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
838     switch (RegKind) {
839       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
840       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
841       default: break;
842     }
843   }
844 };
845 
846 class AMDGPUAsmParser : public MCTargetAsmParser {
847   MCAsmParser &Parser;
848 
849   // Number of extra operands parsed after the first optional operand.
850   // This may be necessary to skip hardcoded mandatory operands.
851   static const unsigned MAX_OPR_LOOKAHEAD = 8;
852 
853   unsigned ForcedEncodingSize = 0;
854   bool ForcedDPP = false;
855   bool ForcedSDWA = false;
856   KernelScopeInfo KernelScope;
857 
858   /// @name Auto-generated Match Functions
859   /// {
860 
861 #define GET_ASSEMBLER_HEADER
862 #include "AMDGPUGenAsmMatcher.inc"
863 
864   /// }
865 
866 private:
867   bool ParseAsAbsoluteExpression(uint32_t &Ret);
868   bool OutOfRangeError(SMRange Range);
869   /// Calculate VGPR/SGPR blocks required for given target, reserved
870   /// registers, and user-specified NextFreeXGPR values.
871   ///
872   /// \param Features [in] Target features, used for bug corrections.
873   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
874   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
875   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
876   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
877   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
878   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
879   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
880   /// \param VGPRBlocks [out] Result VGPR block count.
881   /// \param SGPRBlocks [out] Result SGPR block count.
882   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
883                           bool FlatScrUsed, bool XNACKUsed,
884                           unsigned NextFreeVGPR, SMRange VGPRRange,
885                           unsigned NextFreeSGPR, SMRange SGPRRange,
886                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
887   bool ParseDirectiveAMDGCNTarget();
888   bool ParseDirectiveAMDHSAKernel();
889   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
890   bool ParseDirectiveHSACodeObjectVersion();
891   bool ParseDirectiveHSACodeObjectISA();
892   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
893   bool ParseDirectiveAMDKernelCodeT();
894   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
895   bool ParseDirectiveAMDGPUHsaKernel();
896 
897   bool ParseDirectiveISAVersion();
898   bool ParseDirectiveHSAMetadata();
899   bool ParseDirectivePALMetadataBegin();
900   bool ParseDirectivePALMetadata();
901 
902   /// Common code to parse out a block of text (typically YAML) between start and
903   /// end directives.
904   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
905                            const char *AssemblerDirectiveEnd,
906                            std::string &CollectString);
907 
908   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
909                              RegisterKind RegKind, unsigned Reg1,
910                              unsigned RegNum);
911   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
912                            unsigned& RegNum, unsigned& RegWidth,
913                            unsigned *DwordRegIndex);
914   bool isRegister();
915   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
916   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
917   void initializeGprCountSymbol(RegisterKind RegKind);
918   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
919                              unsigned RegWidth);
920   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
921                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
922   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
923                  bool IsGdsHardcoded);
924 
925 public:
926   enum AMDGPUMatchResultTy {
927     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
928   };
929 
930   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
931 
932   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
933                const MCInstrInfo &MII,
934                const MCTargetOptions &Options)
935       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
936     MCAsmParserExtension::Initialize(Parser);
937 
938     if (getFeatureBits().none()) {
939       // Set default features.
940       copySTI().ToggleFeature("southern-islands");
941     }
942 
943     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
944 
945     {
946       // TODO: make those pre-defined variables read-only.
947       // Currently there is none suitable machinery in the core llvm-mc for this.
948       // MCSymbol::isRedefinable is intended for another purpose, and
949       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
950       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
951       MCContext &Ctx = getContext();
952       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
953         MCSymbol *Sym =
954             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
955         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
956         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
957         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
958         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
959         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
960       } else {
961         MCSymbol *Sym =
962             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
963         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
964         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
965         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
966         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
967         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
968       }
969       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
970         initializeGprCountSymbol(IS_VGPR);
971         initializeGprCountSymbol(IS_SGPR);
972       } else
973         KernelScope.initialize(getContext());
974     }
975   }
976 
977   bool hasXNACK() const {
978     return AMDGPU::hasXNACK(getSTI());
979   }
980 
981   bool hasMIMG_R128() const {
982     return AMDGPU::hasMIMG_R128(getSTI());
983   }
984 
985   bool hasPackedD16() const {
986     return AMDGPU::hasPackedD16(getSTI());
987   }
988 
989   bool isSI() const {
990     return AMDGPU::isSI(getSTI());
991   }
992 
993   bool isCI() const {
994     return AMDGPU::isCI(getSTI());
995   }
996 
997   bool isVI() const {
998     return AMDGPU::isVI(getSTI());
999   }
1000 
1001   bool isGFX9() const {
1002     return AMDGPU::isGFX9(getSTI());
1003   }
1004 
1005   bool isGFX10() const {
1006     return AMDGPU::isGFX10(getSTI());
1007   }
1008 
1009   bool hasInv2PiInlineImm() const {
1010     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1011   }
1012 
1013   bool hasFlatOffsets() const {
1014     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1015   }
1016 
1017   bool hasSGPR102_SGPR103() const {
1018     return !isVI() && !isGFX9();
1019   }
1020 
1021   bool hasSGPR104_SGPR105() const {
1022     return isGFX10();
1023   }
1024 
1025   bool hasIntClamp() const {
1026     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1027   }
1028 
1029   AMDGPUTargetStreamer &getTargetStreamer() {
1030     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1031     return static_cast<AMDGPUTargetStreamer &>(TS);
1032   }
1033 
1034   const MCRegisterInfo *getMRI() const {
1035     // We need this const_cast because for some reason getContext() is not const
1036     // in MCAsmParser.
1037     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1038   }
1039 
1040   const MCInstrInfo *getMII() const {
1041     return &MII;
1042   }
1043 
1044   const FeatureBitset &getFeatureBits() const {
1045     return getSTI().getFeatureBits();
1046   }
1047 
1048   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1049   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1050   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1051 
1052   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1053   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1054   bool isForcedDPP() const { return ForcedDPP; }
1055   bool isForcedSDWA() const { return ForcedSDWA; }
1056   ArrayRef<unsigned> getMatchedVariants() const;
1057 
1058   std::unique_ptr<AMDGPUOperand> parseRegister();
1059   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1060   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1061   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1062                                       unsigned Kind) override;
1063   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1064                                OperandVector &Operands, MCStreamer &Out,
1065                                uint64_t &ErrorInfo,
1066                                bool MatchingInlineAsm) override;
1067   bool ParseDirective(AsmToken DirectiveID) override;
1068   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1069   StringRef parseMnemonicSuffix(StringRef Name);
1070   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1071                         SMLoc NameLoc, OperandVector &Operands) override;
1072   //bool ProcessInstruction(MCInst &Inst);
1073 
1074   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1075 
1076   OperandMatchResultTy
1077   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1078                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1079                      bool (*ConvertResult)(int64_t &) = nullptr);
1080 
1081   OperandMatchResultTy parseOperandArrayWithPrefix(
1082     const char *Prefix,
1083     OperandVector &Operands,
1084     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1085     bool (*ConvertResult)(int64_t&) = nullptr);
1086 
1087   OperandMatchResultTy
1088   parseNamedBit(const char *Name, OperandVector &Operands,
1089                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1090   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1091                                              StringRef &Value);
1092 
1093   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1094   bool parseSP3NegModifier();
1095   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1096   OperandMatchResultTy parseReg(OperandVector &Operands);
1097   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1098   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1099   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1100   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1101   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1102   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1103   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1104 
1105   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1106   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1107   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1108   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1109 
1110   bool parseCnt(int64_t &IntVal);
1111   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1112   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1113 
1114 private:
1115   struct OperandInfoTy {
1116     int64_t Id;
1117     bool IsSymbolic = false;
1118 
1119     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1120   };
1121 
1122   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1123   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1124 
1125   void errorExpTgt();
1126   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1127 
1128   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1129   bool validateSOPLiteral(const MCInst &Inst) const;
1130   bool validateConstantBusLimitations(const MCInst &Inst);
1131   bool validateEarlyClobberLimitations(const MCInst &Inst);
1132   bool validateIntClampSupported(const MCInst &Inst);
1133   bool validateMIMGAtomicDMask(const MCInst &Inst);
1134   bool validateMIMGGatherDMask(const MCInst &Inst);
1135   bool validateMIMGDataSize(const MCInst &Inst);
1136   bool validateMIMGD16(const MCInst &Inst);
1137   bool validateLdsDirect(const MCInst &Inst);
1138   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1139   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1140   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1141 
1142   bool isId(const StringRef Id) const;
1143   bool isId(const AsmToken &Token, const StringRef Id) const;
1144   bool isToken(const AsmToken::TokenKind Kind) const;
1145   bool trySkipId(const StringRef Id);
1146   bool trySkipToken(const AsmToken::TokenKind Kind);
1147   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1148   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1149   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1150   AsmToken::TokenKind getTokenKind() const;
1151   bool parseExpr(int64_t &Imm);
1152   StringRef getTokenStr() const;
1153   AsmToken peekToken();
1154   AsmToken getToken() const;
1155   SMLoc getLoc() const;
1156   void lex();
1157 
1158 public:
1159   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1160   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1161 
1162   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1163   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1164   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1165   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1166   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1167 
1168   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1169                             const unsigned MinVal,
1170                             const unsigned MaxVal,
1171                             const StringRef ErrMsg);
1172   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1173   bool parseSwizzleOffset(int64_t &Imm);
1174   bool parseSwizzleMacro(int64_t &Imm);
1175   bool parseSwizzleQuadPerm(int64_t &Imm);
1176   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1177   bool parseSwizzleBroadcast(int64_t &Imm);
1178   bool parseSwizzleSwap(int64_t &Imm);
1179   bool parseSwizzleReverse(int64_t &Imm);
1180 
1181   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1182   int64_t parseGPRIdxMacro();
1183 
1184   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1185   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1186   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1187   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1188   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1189 
1190   AMDGPUOperand::Ptr defaultDLC() const;
1191   AMDGPUOperand::Ptr defaultGLC() const;
1192   AMDGPUOperand::Ptr defaultSLC() const;
1193 
1194   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1195   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1196   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1197   AMDGPUOperand::Ptr defaultOffsetU12() const;
1198   AMDGPUOperand::Ptr defaultOffsetS13() const;
1199 
1200   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1201 
1202   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1203                OptionalImmIndexMap &OptionalIdx);
1204   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1205   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1206   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1207 
1208   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1209 
1210   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1211                bool IsAtomic = false);
1212   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1213 
1214   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1215   AMDGPUOperand::Ptr defaultRowMask() const;
1216   AMDGPUOperand::Ptr defaultBankMask() const;
1217   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1218   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1219 
1220   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1221                                     AMDGPUOperand::ImmTy Type);
1222   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1223   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1224   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1225   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1226   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1227   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1228                 uint64_t BasicInstType, bool skipVcc = false);
1229 
1230   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1231   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1232 };
1233 
1234 struct OptionalOperand {
1235   const char *Name;
1236   AMDGPUOperand::ImmTy Type;
1237   bool IsBit;
1238   bool (*ConvertResult)(int64_t&);
1239 };
1240 
1241 } // end anonymous namespace
1242 
1243 // May be called with integer type with equivalent bitwidth.
1244 static const fltSemantics *getFltSemantics(unsigned Size) {
1245   switch (Size) {
1246   case 4:
1247     return &APFloat::IEEEsingle();
1248   case 8:
1249     return &APFloat::IEEEdouble();
1250   case 2:
1251     return &APFloat::IEEEhalf();
1252   default:
1253     llvm_unreachable("unsupported fp type");
1254   }
1255 }
1256 
1257 static const fltSemantics *getFltSemantics(MVT VT) {
1258   return getFltSemantics(VT.getSizeInBits() / 8);
1259 }
1260 
1261 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1262   switch (OperandType) {
1263   case AMDGPU::OPERAND_REG_IMM_INT32:
1264   case AMDGPU::OPERAND_REG_IMM_FP32:
1265   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1266   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1267     return &APFloat::IEEEsingle();
1268   case AMDGPU::OPERAND_REG_IMM_INT64:
1269   case AMDGPU::OPERAND_REG_IMM_FP64:
1270   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1271   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1272     return &APFloat::IEEEdouble();
1273   case AMDGPU::OPERAND_REG_IMM_INT16:
1274   case AMDGPU::OPERAND_REG_IMM_FP16:
1275   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1276   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1277   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1278   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1279     return &APFloat::IEEEhalf();
1280   default:
1281     llvm_unreachable("unsupported fp type");
1282   }
1283 }
1284 
1285 //===----------------------------------------------------------------------===//
1286 // Operand
1287 //===----------------------------------------------------------------------===//
1288 
1289 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1290   bool Lost;
1291 
1292   // Convert literal to single precision
1293   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1294                                                APFloat::rmNearestTiesToEven,
1295                                                &Lost);
1296   // We allow precision lost but not overflow or underflow
1297   if (Status != APFloat::opOK &&
1298       Lost &&
1299       ((Status & APFloat::opOverflow)  != 0 ||
1300        (Status & APFloat::opUnderflow) != 0)) {
1301     return false;
1302   }
1303 
1304   return true;
1305 }
1306 
1307 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1308   return isUIntN(Size, Val) || isIntN(Size, Val);
1309 }
1310 
1311 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1312 
1313   // This is a hack to enable named inline values like
1314   // shared_base with both 32-bit and 64-bit operands.
1315   // Note that these values are defined as
1316   // 32-bit operands only.
1317   if (isInlineValue()) {
1318     return true;
1319   }
1320 
1321   if (!isImmTy(ImmTyNone)) {
1322     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1323     return false;
1324   }
1325   // TODO: We should avoid using host float here. It would be better to
1326   // check the float bit values which is what a few other places do.
1327   // We've had bot failures before due to weird NaN support on mips hosts.
1328 
1329   APInt Literal(64, Imm.Val);
1330 
1331   if (Imm.IsFPImm) { // We got fp literal token
1332     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1333       return AMDGPU::isInlinableLiteral64(Imm.Val,
1334                                           AsmParser->hasInv2PiInlineImm());
1335     }
1336 
1337     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1338     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1339       return false;
1340 
1341     if (type.getScalarSizeInBits() == 16) {
1342       return AMDGPU::isInlinableLiteral16(
1343         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1344         AsmParser->hasInv2PiInlineImm());
1345     }
1346 
1347     // Check if single precision literal is inlinable
1348     return AMDGPU::isInlinableLiteral32(
1349       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1350       AsmParser->hasInv2PiInlineImm());
1351   }
1352 
1353   // We got int literal token.
1354   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1355     return AMDGPU::isInlinableLiteral64(Imm.Val,
1356                                         AsmParser->hasInv2PiInlineImm());
1357   }
1358 
1359   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1360     return false;
1361   }
1362 
1363   if (type.getScalarSizeInBits() == 16) {
1364     return AMDGPU::isInlinableLiteral16(
1365       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1366       AsmParser->hasInv2PiInlineImm());
1367   }
1368 
1369   return AMDGPU::isInlinableLiteral32(
1370     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1371     AsmParser->hasInv2PiInlineImm());
1372 }
1373 
1374 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1375   // Check that this immediate can be added as literal
1376   if (!isImmTy(ImmTyNone)) {
1377     return false;
1378   }
1379 
1380   if (!Imm.IsFPImm) {
1381     // We got int literal token.
1382 
1383     if (type == MVT::f64 && hasFPModifiers()) {
1384       // Cannot apply fp modifiers to int literals preserving the same semantics
1385       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1386       // disable these cases.
1387       return false;
1388     }
1389 
1390     unsigned Size = type.getSizeInBits();
1391     if (Size == 64)
1392       Size = 32;
1393 
1394     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1395     // types.
1396     return isSafeTruncation(Imm.Val, Size);
1397   }
1398 
1399   // We got fp literal token
1400   if (type == MVT::f64) { // Expected 64-bit fp operand
1401     // We would set low 64-bits of literal to zeroes but we accept this literals
1402     return true;
1403   }
1404 
1405   if (type == MVT::i64) { // Expected 64-bit int operand
1406     // We don't allow fp literals in 64-bit integer instructions. It is
1407     // unclear how we should encode them.
1408     return false;
1409   }
1410 
1411   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1412   return canLosslesslyConvertToFPType(FPLiteral, type);
1413 }
1414 
1415 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1416   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1417 }
1418 
1419 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1420   if (AsmParser->isVI())
1421     return isVReg32();
1422   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1423     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1424   else
1425     return false;
1426 }
1427 
1428 bool AMDGPUOperand::isSDWAFP16Operand() const {
1429   return isSDWAOperand(MVT::f16);
1430 }
1431 
1432 bool AMDGPUOperand::isSDWAFP32Operand() const {
1433   return isSDWAOperand(MVT::f32);
1434 }
1435 
1436 bool AMDGPUOperand::isSDWAInt16Operand() const {
1437   return isSDWAOperand(MVT::i16);
1438 }
1439 
1440 bool AMDGPUOperand::isSDWAInt32Operand() const {
1441   return isSDWAOperand(MVT::i32);
1442 }
1443 
1444 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1445 {
1446   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1447   assert(Size == 2 || Size == 4 || Size == 8);
1448 
1449   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1450 
1451   if (Imm.Mods.Abs) {
1452     Val &= ~FpSignMask;
1453   }
1454   if (Imm.Mods.Neg) {
1455     Val ^= FpSignMask;
1456   }
1457 
1458   return Val;
1459 }
1460 
1461 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1462   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1463                              Inst.getNumOperands())) {
1464     addLiteralImmOperand(Inst, Imm.Val,
1465                          ApplyModifiers &
1466                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1467   } else {
1468     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1469     Inst.addOperand(MCOperand::createImm(Imm.Val));
1470   }
1471 }
1472 
1473 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1474   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1475   auto OpNum = Inst.getNumOperands();
1476   // Check that this operand accepts literals
1477   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1478 
1479   if (ApplyModifiers) {
1480     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1481     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1482     Val = applyInputFPModifiers(Val, Size);
1483   }
1484 
1485   APInt Literal(64, Val);
1486   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1487 
1488   if (Imm.IsFPImm) { // We got fp literal token
1489     switch (OpTy) {
1490     case AMDGPU::OPERAND_REG_IMM_INT64:
1491     case AMDGPU::OPERAND_REG_IMM_FP64:
1492     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1493     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1494       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1495                                        AsmParser->hasInv2PiInlineImm())) {
1496         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1497         return;
1498       }
1499 
1500       // Non-inlineable
1501       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1502         // For fp operands we check if low 32 bits are zeros
1503         if (Literal.getLoBits(32) != 0) {
1504           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1505           "Can't encode literal as exact 64-bit floating-point operand. "
1506           "Low 32-bits will be set to zero");
1507         }
1508 
1509         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1510         return;
1511       }
1512 
1513       // We don't allow fp literals in 64-bit integer instructions. It is
1514       // unclear how we should encode them. This case should be checked earlier
1515       // in predicate methods (isLiteralImm())
1516       llvm_unreachable("fp literal in 64-bit integer instruction.");
1517 
1518     case AMDGPU::OPERAND_REG_IMM_INT32:
1519     case AMDGPU::OPERAND_REG_IMM_FP32:
1520     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1521     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1522     case AMDGPU::OPERAND_REG_IMM_INT16:
1523     case AMDGPU::OPERAND_REG_IMM_FP16:
1524     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1525     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1526     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1527     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1528       bool lost;
1529       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1530       // Convert literal to single precision
1531       FPLiteral.convert(*getOpFltSemantics(OpTy),
1532                         APFloat::rmNearestTiesToEven, &lost);
1533       // We allow precision lost but not overflow or underflow. This should be
1534       // checked earlier in isLiteralImm()
1535 
1536       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1537       Inst.addOperand(MCOperand::createImm(ImmVal));
1538       return;
1539     }
1540     default:
1541       llvm_unreachable("invalid operand size");
1542     }
1543 
1544     return;
1545   }
1546 
1547   // We got int literal token.
1548   // Only sign extend inline immediates.
1549   switch (OpTy) {
1550   case AMDGPU::OPERAND_REG_IMM_INT32:
1551   case AMDGPU::OPERAND_REG_IMM_FP32:
1552   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1553   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1554     if (isSafeTruncation(Val, 32) &&
1555         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1556                                      AsmParser->hasInv2PiInlineImm())) {
1557       Inst.addOperand(MCOperand::createImm(Val));
1558       return;
1559     }
1560 
1561     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1562     return;
1563 
1564   case AMDGPU::OPERAND_REG_IMM_INT64:
1565   case AMDGPU::OPERAND_REG_IMM_FP64:
1566   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1567   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1568     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1569       Inst.addOperand(MCOperand::createImm(Val));
1570       return;
1571     }
1572 
1573     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1574     return;
1575 
1576   case AMDGPU::OPERAND_REG_IMM_INT16:
1577   case AMDGPU::OPERAND_REG_IMM_FP16:
1578   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1579   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1580     if (isSafeTruncation(Val, 16) &&
1581         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1582                                      AsmParser->hasInv2PiInlineImm())) {
1583       Inst.addOperand(MCOperand::createImm(Val));
1584       return;
1585     }
1586 
1587     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1588     return;
1589 
1590   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1591   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1592     assert(isSafeTruncation(Val, 16));
1593     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1594                                         AsmParser->hasInv2PiInlineImm()));
1595 
1596     Inst.addOperand(MCOperand::createImm(Val));
1597     return;
1598   }
1599   default:
1600     llvm_unreachable("invalid operand size");
1601   }
1602 }
1603 
1604 template <unsigned Bitwidth>
1605 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1606   APInt Literal(64, Imm.Val);
1607 
1608   if (!Imm.IsFPImm) {
1609     // We got int literal token.
1610     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1611     return;
1612   }
1613 
1614   bool Lost;
1615   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1616   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1617                     APFloat::rmNearestTiesToEven, &Lost);
1618   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1619 }
1620 
1621 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1622   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1623 }
1624 
1625 static bool isInlineValue(unsigned Reg) {
1626   switch (Reg) {
1627   case AMDGPU::SRC_SHARED_BASE:
1628   case AMDGPU::SRC_SHARED_LIMIT:
1629   case AMDGPU::SRC_PRIVATE_BASE:
1630   case AMDGPU::SRC_PRIVATE_LIMIT:
1631   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1632     return true;
1633   default:
1634     return false;
1635   }
1636 }
1637 
1638 bool AMDGPUOperand::isInlineValue() const {
1639   return isRegKind() && ::isInlineValue(getReg());
1640 }
1641 
1642 //===----------------------------------------------------------------------===//
1643 // AsmParser
1644 //===----------------------------------------------------------------------===//
1645 
1646 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1647   if (Is == IS_VGPR) {
1648     switch (RegWidth) {
1649       default: return -1;
1650       case 1: return AMDGPU::VGPR_32RegClassID;
1651       case 2: return AMDGPU::VReg_64RegClassID;
1652       case 3: return AMDGPU::VReg_96RegClassID;
1653       case 4: return AMDGPU::VReg_128RegClassID;
1654       case 8: return AMDGPU::VReg_256RegClassID;
1655       case 16: return AMDGPU::VReg_512RegClassID;
1656     }
1657   } else if (Is == IS_TTMP) {
1658     switch (RegWidth) {
1659       default: return -1;
1660       case 1: return AMDGPU::TTMP_32RegClassID;
1661       case 2: return AMDGPU::TTMP_64RegClassID;
1662       case 4: return AMDGPU::TTMP_128RegClassID;
1663       case 8: return AMDGPU::TTMP_256RegClassID;
1664       case 16: return AMDGPU::TTMP_512RegClassID;
1665     }
1666   } else if (Is == IS_SGPR) {
1667     switch (RegWidth) {
1668       default: return -1;
1669       case 1: return AMDGPU::SGPR_32RegClassID;
1670       case 2: return AMDGPU::SGPR_64RegClassID;
1671       case 4: return AMDGPU::SGPR_128RegClassID;
1672       case 8: return AMDGPU::SGPR_256RegClassID;
1673       case 16: return AMDGPU::SGPR_512RegClassID;
1674     }
1675   }
1676   return -1;
1677 }
1678 
1679 static unsigned getSpecialRegForName(StringRef RegName) {
1680   return StringSwitch<unsigned>(RegName)
1681     .Case("exec", AMDGPU::EXEC)
1682     .Case("vcc", AMDGPU::VCC)
1683     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1684     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1685     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1686     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1687     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1688     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1689     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1690     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1691     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1692     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1693     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1694     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1695     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1696     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1697     .Case("m0", AMDGPU::M0)
1698     .Case("scc", AMDGPU::SCC)
1699     .Case("tba", AMDGPU::TBA)
1700     .Case("tma", AMDGPU::TMA)
1701     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1702     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1703     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1704     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1705     .Case("vcc_lo", AMDGPU::VCC_LO)
1706     .Case("vcc_hi", AMDGPU::VCC_HI)
1707     .Case("exec_lo", AMDGPU::EXEC_LO)
1708     .Case("exec_hi", AMDGPU::EXEC_HI)
1709     .Case("tma_lo", AMDGPU::TMA_LO)
1710     .Case("tma_hi", AMDGPU::TMA_HI)
1711     .Case("tba_lo", AMDGPU::TBA_LO)
1712     .Case("tba_hi", AMDGPU::TBA_HI)
1713     .Case("null", AMDGPU::SGPR_NULL)
1714     .Default(0);
1715 }
1716 
1717 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1718                                     SMLoc &EndLoc) {
1719   auto R = parseRegister();
1720   if (!R) return true;
1721   assert(R->isReg());
1722   RegNo = R->getReg();
1723   StartLoc = R->getStartLoc();
1724   EndLoc = R->getEndLoc();
1725   return false;
1726 }
1727 
1728 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1729                                             RegisterKind RegKind, unsigned Reg1,
1730                                             unsigned RegNum) {
1731   switch (RegKind) {
1732   case IS_SPECIAL:
1733     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1734       Reg = AMDGPU::EXEC;
1735       RegWidth = 2;
1736       return true;
1737     }
1738     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1739       Reg = AMDGPU::FLAT_SCR;
1740       RegWidth = 2;
1741       return true;
1742     }
1743     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1744       Reg = AMDGPU::XNACK_MASK;
1745       RegWidth = 2;
1746       return true;
1747     }
1748     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1749       Reg = AMDGPU::VCC;
1750       RegWidth = 2;
1751       return true;
1752     }
1753     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1754       Reg = AMDGPU::TBA;
1755       RegWidth = 2;
1756       return true;
1757     }
1758     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1759       Reg = AMDGPU::TMA;
1760       RegWidth = 2;
1761       return true;
1762     }
1763     return false;
1764   case IS_VGPR:
1765   case IS_SGPR:
1766   case IS_TTMP:
1767     if (Reg1 != Reg + RegWidth) {
1768       return false;
1769     }
1770     RegWidth++;
1771     return true;
1772   default:
1773     llvm_unreachable("unexpected register kind");
1774   }
1775 }
1776 
1777 static const StringRef Registers[] = {
1778   { "v" },
1779   { "s" },
1780   { "ttmp" },
1781 };
1782 
1783 bool
1784 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1785                             const AsmToken &NextToken) const {
1786 
1787   // A list of consecutive registers: [s0,s1,s2,s3]
1788   if (Token.is(AsmToken::LBrac))
1789     return true;
1790 
1791   if (!Token.is(AsmToken::Identifier))
1792     return false;
1793 
1794   // A single register like s0 or a range of registers like s[0:1]
1795 
1796   StringRef RegName = Token.getString();
1797 
1798   for (StringRef Reg : Registers) {
1799     if (RegName.startswith(Reg)) {
1800       if (Reg.size() < RegName.size()) {
1801         unsigned RegNum;
1802         // A single register with an index: rXX
1803         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1804           return true;
1805       } else {
1806         // A range of registers: r[XX:YY].
1807         if (NextToken.is(AsmToken::LBrac))
1808           return true;
1809       }
1810     }
1811   }
1812 
1813   return getSpecialRegForName(RegName);
1814 }
1815 
1816 bool
1817 AMDGPUAsmParser::isRegister()
1818 {
1819   return isRegister(getToken(), peekToken());
1820 }
1821 
1822 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1823                                           unsigned &RegNum, unsigned &RegWidth,
1824                                           unsigned *DwordRegIndex) {
1825   if (DwordRegIndex) { *DwordRegIndex = 0; }
1826   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1827   if (getLexer().is(AsmToken::Identifier)) {
1828     StringRef RegName = Parser.getTok().getString();
1829     if ((Reg = getSpecialRegForName(RegName))) {
1830       Parser.Lex();
1831       RegKind = IS_SPECIAL;
1832     } else {
1833       unsigned RegNumIndex = 0;
1834       if (RegName[0] == 'v') {
1835         RegNumIndex = 1;
1836         RegKind = IS_VGPR;
1837       } else if (RegName[0] == 's') {
1838         RegNumIndex = 1;
1839         RegKind = IS_SGPR;
1840       } else if (RegName.startswith("ttmp")) {
1841         RegNumIndex = strlen("ttmp");
1842         RegKind = IS_TTMP;
1843       } else {
1844         return false;
1845       }
1846       if (RegName.size() > RegNumIndex) {
1847         // Single 32-bit register: vXX.
1848         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1849           return false;
1850         Parser.Lex();
1851         RegWidth = 1;
1852       } else {
1853         // Range of registers: v[XX:YY]. ":YY" is optional.
1854         Parser.Lex();
1855         int64_t RegLo, RegHi;
1856         if (getLexer().isNot(AsmToken::LBrac))
1857           return false;
1858         Parser.Lex();
1859 
1860         if (getParser().parseAbsoluteExpression(RegLo))
1861           return false;
1862 
1863         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1864         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1865           return false;
1866         Parser.Lex();
1867 
1868         if (isRBrace) {
1869           RegHi = RegLo;
1870         } else {
1871           if (getParser().parseAbsoluteExpression(RegHi))
1872             return false;
1873 
1874           if (getLexer().isNot(AsmToken::RBrac))
1875             return false;
1876           Parser.Lex();
1877         }
1878         RegNum = (unsigned) RegLo;
1879         RegWidth = (RegHi - RegLo) + 1;
1880       }
1881     }
1882   } else if (getLexer().is(AsmToken::LBrac)) {
1883     // List of consecutive registers: [s0,s1,s2,s3]
1884     Parser.Lex();
1885     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1886       return false;
1887     if (RegWidth != 1)
1888       return false;
1889     RegisterKind RegKind1;
1890     unsigned Reg1, RegNum1, RegWidth1;
1891     do {
1892       if (getLexer().is(AsmToken::Comma)) {
1893         Parser.Lex();
1894       } else if (getLexer().is(AsmToken::RBrac)) {
1895         Parser.Lex();
1896         break;
1897       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1898         if (RegWidth1 != 1) {
1899           return false;
1900         }
1901         if (RegKind1 != RegKind) {
1902           return false;
1903         }
1904         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1905           return false;
1906         }
1907       } else {
1908         return false;
1909       }
1910     } while (true);
1911   } else {
1912     return false;
1913   }
1914   switch (RegKind) {
1915   case IS_SPECIAL:
1916     RegNum = 0;
1917     RegWidth = 1;
1918     break;
1919   case IS_VGPR:
1920   case IS_SGPR:
1921   case IS_TTMP:
1922   {
1923     unsigned Size = 1;
1924     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1925       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1926       Size = std::min(RegWidth, 4u);
1927     }
1928     if (RegNum % Size != 0)
1929       return false;
1930     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1931     RegNum = RegNum / Size;
1932     int RCID = getRegClass(RegKind, RegWidth);
1933     if (RCID == -1)
1934       return false;
1935     const MCRegisterClass RC = TRI->getRegClass(RCID);
1936     if (RegNum >= RC.getNumRegs())
1937       return false;
1938     Reg = RC.getRegister(RegNum);
1939     break;
1940   }
1941 
1942   default:
1943     llvm_unreachable("unexpected register kind");
1944   }
1945 
1946   if (!subtargetHasRegister(*TRI, Reg))
1947     return false;
1948   return true;
1949 }
1950 
1951 Optional<StringRef>
1952 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1953   switch (RegKind) {
1954   case IS_VGPR:
1955     return StringRef(".amdgcn.next_free_vgpr");
1956   case IS_SGPR:
1957     return StringRef(".amdgcn.next_free_sgpr");
1958   default:
1959     return None;
1960   }
1961 }
1962 
1963 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1964   auto SymbolName = getGprCountSymbolName(RegKind);
1965   assert(SymbolName && "initializing invalid register kind");
1966   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1967   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1968 }
1969 
1970 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1971                                             unsigned DwordRegIndex,
1972                                             unsigned RegWidth) {
1973   // Symbols are only defined for GCN targets
1974   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1975     return true;
1976 
1977   auto SymbolName = getGprCountSymbolName(RegKind);
1978   if (!SymbolName)
1979     return true;
1980   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1981 
1982   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1983   int64_t OldCount;
1984 
1985   if (!Sym->isVariable())
1986     return !Error(getParser().getTok().getLoc(),
1987                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1988   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1989     return !Error(
1990         getParser().getTok().getLoc(),
1991         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1992 
1993   if (OldCount <= NewMax)
1994     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1995 
1996   return true;
1997 }
1998 
1999 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2000   const auto &Tok = Parser.getTok();
2001   SMLoc StartLoc = Tok.getLoc();
2002   SMLoc EndLoc = Tok.getEndLoc();
2003   RegisterKind RegKind;
2004   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2005 
2006   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2007     //FIXME: improve error messages (bug 41303).
2008     Error(StartLoc, "not a valid operand.");
2009     return nullptr;
2010   }
2011   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2012     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2013       return nullptr;
2014   } else
2015     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2016   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2017 }
2018 
2019 bool
2020 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2021   if (HasSP3AbsModifier) {
2022     // This is a workaround for handling expressions
2023     // as arguments of SP3 'abs' modifier, for example:
2024     //     |1.0|
2025     //     |-1|
2026     //     |1+x|
2027     // This syntax is not compatible with syntax of standard
2028     // MC expressions (due to the trailing '|').
2029 
2030     SMLoc EndLoc;
2031     const MCExpr *Expr;
2032     SMLoc StartLoc = getLoc();
2033 
2034     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2035       return true;
2036     }
2037 
2038     if (!Expr->evaluateAsAbsolute(Val))
2039       return Error(StartLoc, "expected absolute expression");
2040 
2041     return false;
2042   }
2043 
2044   return getParser().parseAbsoluteExpression(Val);
2045 }
2046 
2047 OperandMatchResultTy
2048 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2049   // TODO: add syntactic sugar for 1/(2*PI)
2050 
2051   const auto& Tok = getToken();
2052   const auto& NextTok = peekToken();
2053   bool IsReal = Tok.is(AsmToken::Real);
2054   SMLoc S = Tok.getLoc();
2055   bool Negate = false;
2056 
2057   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2058     lex();
2059     IsReal = true;
2060     Negate = true;
2061   }
2062 
2063   if (IsReal) {
2064     // Floating-point expressions are not supported.
2065     // Can only allow floating-point literals with an
2066     // optional sign.
2067 
2068     StringRef Num = getTokenStr();
2069     lex();
2070 
2071     APFloat RealVal(APFloat::IEEEdouble());
2072     auto roundMode = APFloat::rmNearestTiesToEven;
2073     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2074       return MatchOperand_ParseFail;
2075     }
2076     if (Negate)
2077       RealVal.changeSign();
2078 
2079     Operands.push_back(
2080       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2081                                AMDGPUOperand::ImmTyNone, true));
2082 
2083     return MatchOperand_Success;
2084 
2085     // FIXME: Should enable arbitrary expressions here
2086   } else if (Tok.is(AsmToken::Integer) ||
2087              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2088 
2089     int64_t IntVal;
2090     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2091       return MatchOperand_ParseFail;
2092 
2093     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2094     return MatchOperand_Success;
2095   }
2096 
2097   return MatchOperand_NoMatch;
2098 }
2099 
2100 OperandMatchResultTy
2101 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2102   if (!isRegister())
2103     return MatchOperand_NoMatch;
2104 
2105   if (auto R = parseRegister()) {
2106     assert(R->isReg());
2107     Operands.push_back(std::move(R));
2108     return MatchOperand_Success;
2109   }
2110   return MatchOperand_ParseFail;
2111 }
2112 
2113 OperandMatchResultTy
2114 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2115   auto res = parseReg(Operands);
2116   return (res == MatchOperand_NoMatch)?
2117          parseImm(Operands, HasSP3AbsMod) :
2118          res;
2119 }
2120 
2121 // Check if the current token is an SP3 'neg' modifier.
2122 // Currently this modifier is allowed in the following context:
2123 //
2124 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2125 // 2. Before an 'abs' modifier: -abs(...)
2126 // 3. Before an SP3 'abs' modifier: -|...|
2127 //
2128 // In all other cases "-" is handled as a part
2129 // of an expression that follows the sign.
2130 //
2131 // Note: When "-" is followed by an integer literal,
2132 // this is interpreted as integer negation rather
2133 // than a floating-point NEG modifier applied to N.
2134 // Beside being contr-intuitive, such use of floating-point
2135 // NEG modifier would have resulted in different meaning
2136 // of integer literals used with VOP1/2/C and VOP3,
2137 // for example:
2138 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2139 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2140 // Negative fp literals with preceding "-" are
2141 // handled likewise for unifomtity
2142 //
2143 bool
2144 AMDGPUAsmParser::parseSP3NegModifier() {
2145 
2146   AsmToken NextToken[2];
2147   peekTokens(NextToken);
2148 
2149   if (isToken(AsmToken::Minus) &&
2150       (isRegister(NextToken[0], NextToken[1]) ||
2151        NextToken[0].is(AsmToken::Pipe) ||
2152        isId(NextToken[0], "abs"))) {
2153     lex();
2154     return true;
2155   }
2156 
2157   return false;
2158 }
2159 
2160 OperandMatchResultTy
2161 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2162                                               bool AllowImm) {
2163   bool Neg, SP3Neg;
2164   bool Abs, SP3Abs;
2165   SMLoc Loc;
2166 
2167   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2168   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2169     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2170     return MatchOperand_ParseFail;
2171   }
2172 
2173   SP3Neg = parseSP3NegModifier();
2174 
2175   Loc = getLoc();
2176   Neg = trySkipId("neg");
2177   if (Neg && SP3Neg) {
2178     Error(Loc, "expected register or immediate");
2179     return MatchOperand_ParseFail;
2180   }
2181   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2182     return MatchOperand_ParseFail;
2183 
2184   Abs = trySkipId("abs");
2185   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2186     return MatchOperand_ParseFail;
2187 
2188   Loc = getLoc();
2189   SP3Abs = trySkipToken(AsmToken::Pipe);
2190   if (Abs && SP3Abs) {
2191     Error(Loc, "expected register or immediate");
2192     return MatchOperand_ParseFail;
2193   }
2194 
2195   OperandMatchResultTy Res;
2196   if (AllowImm) {
2197     Res = parseRegOrImm(Operands, SP3Abs);
2198   } else {
2199     Res = parseReg(Operands);
2200   }
2201   if (Res != MatchOperand_Success) {
2202     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2203   }
2204 
2205   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2206     return MatchOperand_ParseFail;
2207   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2208     return MatchOperand_ParseFail;
2209   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2210     return MatchOperand_ParseFail;
2211 
2212   AMDGPUOperand::Modifiers Mods;
2213   Mods.Abs = Abs || SP3Abs;
2214   Mods.Neg = Neg || SP3Neg;
2215 
2216   if (Mods.hasFPModifiers()) {
2217     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2218     Op.setModifiers(Mods);
2219   }
2220   return MatchOperand_Success;
2221 }
2222 
2223 OperandMatchResultTy
2224 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2225                                                bool AllowImm) {
2226   bool Sext = trySkipId("sext");
2227   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2228     return MatchOperand_ParseFail;
2229 
2230   OperandMatchResultTy Res;
2231   if (AllowImm) {
2232     Res = parseRegOrImm(Operands);
2233   } else {
2234     Res = parseReg(Operands);
2235   }
2236   if (Res != MatchOperand_Success) {
2237     return Sext? MatchOperand_ParseFail : Res;
2238   }
2239 
2240   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2241     return MatchOperand_ParseFail;
2242 
2243   AMDGPUOperand::Modifiers Mods;
2244   Mods.Sext = Sext;
2245 
2246   if (Mods.hasIntModifiers()) {
2247     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2248     Op.setModifiers(Mods);
2249   }
2250 
2251   return MatchOperand_Success;
2252 }
2253 
2254 OperandMatchResultTy
2255 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2256   return parseRegOrImmWithFPInputMods(Operands, false);
2257 }
2258 
2259 OperandMatchResultTy
2260 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2261   return parseRegOrImmWithIntInputMods(Operands, false);
2262 }
2263 
2264 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2265   auto Loc = getLoc();
2266   if (trySkipId("off")) {
2267     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2268                                                 AMDGPUOperand::ImmTyOff, false));
2269     return MatchOperand_Success;
2270   }
2271 
2272   if (!isRegister())
2273     return MatchOperand_NoMatch;
2274 
2275   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2276   if (Reg) {
2277     Operands.push_back(std::move(Reg));
2278     return MatchOperand_Success;
2279   }
2280 
2281   return MatchOperand_ParseFail;
2282 
2283 }
2284 
2285 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2286   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2287 
2288   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2289       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2290       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2291       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2292     return Match_InvalidOperand;
2293 
2294   if ((TSFlags & SIInstrFlags::VOP3) &&
2295       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2296       getForcedEncodingSize() != 64)
2297     return Match_PreferE32;
2298 
2299   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2300       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2301     // v_mac_f32/16 allow only dst_sel == DWORD;
2302     auto OpNum =
2303         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2304     const auto &Op = Inst.getOperand(OpNum);
2305     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2306       return Match_InvalidOperand;
2307     }
2308   }
2309 
2310   if (TSFlags & SIInstrFlags::FLAT) {
2311     // FIXME: Produces error without correct column reported.
2312     auto Opcode = Inst.getOpcode();
2313     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2314 
2315     const auto &Op = Inst.getOperand(OpNum);
2316     if (!hasFlatOffsets() && Op.getImm() != 0)
2317       return Match_InvalidOperand;
2318 
2319     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2320     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2321     if (isGFX10()) {
2322       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2323         if (!isInt<12>(Op.getImm()))
2324           return Match_InvalidOperand;
2325       } else {
2326         if (!isUInt<11>(Op.getImm()))
2327           return Match_InvalidOperand;
2328       }
2329     }
2330   }
2331 
2332   return Match_Success;
2333 }
2334 
2335 // What asm variants we should check
2336 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2337   if (getForcedEncodingSize() == 32) {
2338     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2339     return makeArrayRef(Variants);
2340   }
2341 
2342   if (isForcedVOP3()) {
2343     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2344     return makeArrayRef(Variants);
2345   }
2346 
2347   if (isForcedSDWA()) {
2348     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2349                                         AMDGPUAsmVariants::SDWA9};
2350     return makeArrayRef(Variants);
2351   }
2352 
2353   if (isForcedDPP()) {
2354     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2355     return makeArrayRef(Variants);
2356   }
2357 
2358   static const unsigned Variants[] = {
2359     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2360     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2361   };
2362 
2363   return makeArrayRef(Variants);
2364 }
2365 
2366 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2367   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2368   const unsigned Num = Desc.getNumImplicitUses();
2369   for (unsigned i = 0; i < Num; ++i) {
2370     unsigned Reg = Desc.ImplicitUses[i];
2371     switch (Reg) {
2372     case AMDGPU::FLAT_SCR:
2373     case AMDGPU::VCC:
2374     case AMDGPU::VCC_LO:
2375     case AMDGPU::VCC_HI:
2376     case AMDGPU::M0:
2377     case AMDGPU::SGPR_NULL:
2378       return Reg;
2379     default:
2380       break;
2381     }
2382   }
2383   return AMDGPU::NoRegister;
2384 }
2385 
2386 // NB: This code is correct only when used to check constant
2387 // bus limitations because GFX7 support no f16 inline constants.
2388 // Note that there are no cases when a GFX7 opcode violates
2389 // constant bus limitations due to the use of an f16 constant.
2390 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2391                                        unsigned OpIdx) const {
2392   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2393 
2394   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2395     return false;
2396   }
2397 
2398   const MCOperand &MO = Inst.getOperand(OpIdx);
2399 
2400   int64_t Val = MO.getImm();
2401   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2402 
2403   switch (OpSize) { // expected operand size
2404   case 8:
2405     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2406   case 4:
2407     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2408   case 2: {
2409     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2410     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2411         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2412       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2413     } else {
2414       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2415     }
2416   }
2417   default:
2418     llvm_unreachable("invalid operand size");
2419   }
2420 }
2421 
2422 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2423   const MCOperand &MO = Inst.getOperand(OpIdx);
2424   if (MO.isImm()) {
2425     return !isInlineConstant(Inst, OpIdx);
2426   }
2427   return !MO.isReg() ||
2428          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2429 }
2430 
2431 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2432   const unsigned Opcode = Inst.getOpcode();
2433   const MCInstrDesc &Desc = MII.get(Opcode);
2434   unsigned ConstantBusUseCount = 0;
2435 
2436   if (Desc.TSFlags &
2437       (SIInstrFlags::VOPC |
2438        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2439        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2440        SIInstrFlags::SDWA)) {
2441     // Check special imm operands (used by madmk, etc)
2442     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2443       ++ConstantBusUseCount;
2444     }
2445 
2446     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2447     if (SGPRUsed != AMDGPU::NoRegister) {
2448       ++ConstantBusUseCount;
2449     }
2450 
2451     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2452     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2453     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2454 
2455     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2456 
2457     for (int OpIdx : OpIndices) {
2458       if (OpIdx == -1) break;
2459 
2460       const MCOperand &MO = Inst.getOperand(OpIdx);
2461       if (usesConstantBus(Inst, OpIdx)) {
2462         if (MO.isReg()) {
2463           const unsigned Reg = mc2PseudoReg(MO.getReg());
2464           // Pairs of registers with a partial intersections like these
2465           //   s0, s[0:1]
2466           //   flat_scratch_lo, flat_scratch
2467           //   flat_scratch_lo, flat_scratch_hi
2468           // are theoretically valid but they are disabled anyway.
2469           // Note that this code mimics SIInstrInfo::verifyInstruction
2470           if (Reg != SGPRUsed) {
2471             ++ConstantBusUseCount;
2472           }
2473           SGPRUsed = Reg;
2474         } else { // Expression or a literal
2475           ++ConstantBusUseCount;
2476         }
2477       }
2478     }
2479   }
2480 
2481   return ConstantBusUseCount <= 1;
2482 }
2483 
2484 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2485   const unsigned Opcode = Inst.getOpcode();
2486   const MCInstrDesc &Desc = MII.get(Opcode);
2487 
2488   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2489   if (DstIdx == -1 ||
2490       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2491     return true;
2492   }
2493 
2494   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2495 
2496   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2497   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2498   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2499 
2500   assert(DstIdx != -1);
2501   const MCOperand &Dst = Inst.getOperand(DstIdx);
2502   assert(Dst.isReg());
2503   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2504 
2505   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2506 
2507   for (int SrcIdx : SrcIndices) {
2508     if (SrcIdx == -1) break;
2509     const MCOperand &Src = Inst.getOperand(SrcIdx);
2510     if (Src.isReg()) {
2511       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2512       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2513         return false;
2514       }
2515     }
2516   }
2517 
2518   return true;
2519 }
2520 
2521 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2522 
2523   const unsigned Opc = Inst.getOpcode();
2524   const MCInstrDesc &Desc = MII.get(Opc);
2525 
2526   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2527     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2528     assert(ClampIdx != -1);
2529     return Inst.getOperand(ClampIdx).getImm() == 0;
2530   }
2531 
2532   return true;
2533 }
2534 
2535 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2536 
2537   const unsigned Opc = Inst.getOpcode();
2538   const MCInstrDesc &Desc = MII.get(Opc);
2539 
2540   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2541     return true;
2542 
2543   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2544   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2545   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2546 
2547   assert(VDataIdx != -1);
2548   assert(DMaskIdx != -1);
2549   assert(TFEIdx != -1);
2550 
2551   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2552   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2553   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2554   if (DMask == 0)
2555     DMask = 1;
2556 
2557   unsigned DataSize =
2558     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2559   if (hasPackedD16()) {
2560     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2561     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2562       DataSize = (DataSize + 1) / 2;
2563   }
2564 
2565   return (VDataSize / 4) == DataSize + TFESize;
2566 }
2567 
2568 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2569 
2570   const unsigned Opc = Inst.getOpcode();
2571   const MCInstrDesc &Desc = MII.get(Opc);
2572 
2573   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2574     return true;
2575   if (!Desc.mayLoad() || !Desc.mayStore())
2576     return true; // Not atomic
2577 
2578   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2579   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2580 
2581   // This is an incomplete check because image_atomic_cmpswap
2582   // may only use 0x3 and 0xf while other atomic operations
2583   // may use 0x1 and 0x3. However these limitations are
2584   // verified when we check that dmask matches dst size.
2585   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2586 }
2587 
2588 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2589 
2590   const unsigned Opc = Inst.getOpcode();
2591   const MCInstrDesc &Desc = MII.get(Opc);
2592 
2593   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2594     return true;
2595 
2596   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2597   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2598 
2599   // GATHER4 instructions use dmask in a different fashion compared to
2600   // other MIMG instructions. The only useful DMASK values are
2601   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2602   // (red,red,red,red) etc.) The ISA document doesn't mention
2603   // this.
2604   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2605 }
2606 
2607 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2608 
2609   const unsigned Opc = Inst.getOpcode();
2610   const MCInstrDesc &Desc = MII.get(Opc);
2611 
2612   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2613     return true;
2614 
2615   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2616   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2617     if (isCI() || isSI())
2618       return false;
2619   }
2620 
2621   return true;
2622 }
2623 
2624 static bool IsRevOpcode(const unsigned Opcode)
2625 {
2626   switch (Opcode) {
2627   case AMDGPU::V_SUBREV_F32_e32:
2628   case AMDGPU::V_SUBREV_F32_e64:
2629   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2630   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2631   case AMDGPU::V_SUBREV_F32_e32_vi:
2632   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2633   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2634   case AMDGPU::V_SUBREV_F32_e64_vi:
2635 
2636   case AMDGPU::V_SUBREV_I32_e32:
2637   case AMDGPU::V_SUBREV_I32_e64:
2638   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2639   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2640 
2641   case AMDGPU::V_SUBBREV_U32_e32:
2642   case AMDGPU::V_SUBBREV_U32_e64:
2643   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2644   case AMDGPU::V_SUBBREV_U32_e32_vi:
2645   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2646   case AMDGPU::V_SUBBREV_U32_e64_vi:
2647 
2648   case AMDGPU::V_SUBREV_U32_e32:
2649   case AMDGPU::V_SUBREV_U32_e64:
2650   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2651   case AMDGPU::V_SUBREV_U32_e32_vi:
2652   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2653   case AMDGPU::V_SUBREV_U32_e64_vi:
2654 
2655   case AMDGPU::V_SUBREV_F16_e32:
2656   case AMDGPU::V_SUBREV_F16_e64:
2657   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2658   case AMDGPU::V_SUBREV_F16_e32_vi:
2659   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2660   case AMDGPU::V_SUBREV_F16_e64_vi:
2661 
2662   case AMDGPU::V_SUBREV_U16_e32:
2663   case AMDGPU::V_SUBREV_U16_e64:
2664   case AMDGPU::V_SUBREV_U16_e32_vi:
2665   case AMDGPU::V_SUBREV_U16_e64_vi:
2666 
2667   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2668   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2669   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2670 
2671   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2672   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2673 
2674   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2675   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2676 
2677   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2678   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2679 
2680   case AMDGPU::V_LSHRREV_B32_e32:
2681   case AMDGPU::V_LSHRREV_B32_e64:
2682   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2683   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2684   case AMDGPU::V_LSHRREV_B32_e32_vi:
2685   case AMDGPU::V_LSHRREV_B32_e64_vi:
2686   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2687   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2688 
2689   case AMDGPU::V_ASHRREV_I32_e32:
2690   case AMDGPU::V_ASHRREV_I32_e64:
2691   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2692   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2693   case AMDGPU::V_ASHRREV_I32_e32_vi:
2694   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2695   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2696   case AMDGPU::V_ASHRREV_I32_e64_vi:
2697 
2698   case AMDGPU::V_LSHLREV_B32_e32:
2699   case AMDGPU::V_LSHLREV_B32_e64:
2700   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2701   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2702   case AMDGPU::V_LSHLREV_B32_e32_vi:
2703   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2704   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2705   case AMDGPU::V_LSHLREV_B32_e64_vi:
2706 
2707   case AMDGPU::V_LSHLREV_B16_e32:
2708   case AMDGPU::V_LSHLREV_B16_e64:
2709   case AMDGPU::V_LSHLREV_B16_e32_vi:
2710   case AMDGPU::V_LSHLREV_B16_e64_vi:
2711   case AMDGPU::V_LSHLREV_B16_gfx10:
2712 
2713   case AMDGPU::V_LSHRREV_B16_e32:
2714   case AMDGPU::V_LSHRREV_B16_e64:
2715   case AMDGPU::V_LSHRREV_B16_e32_vi:
2716   case AMDGPU::V_LSHRREV_B16_e64_vi:
2717   case AMDGPU::V_LSHRREV_B16_gfx10:
2718 
2719   case AMDGPU::V_ASHRREV_I16_e32:
2720   case AMDGPU::V_ASHRREV_I16_e64:
2721   case AMDGPU::V_ASHRREV_I16_e32_vi:
2722   case AMDGPU::V_ASHRREV_I16_e64_vi:
2723   case AMDGPU::V_ASHRREV_I16_gfx10:
2724 
2725   case AMDGPU::V_LSHLREV_B64:
2726   case AMDGPU::V_LSHLREV_B64_gfx10:
2727   case AMDGPU::V_LSHLREV_B64_vi:
2728 
2729   case AMDGPU::V_LSHRREV_B64:
2730   case AMDGPU::V_LSHRREV_B64_gfx10:
2731   case AMDGPU::V_LSHRREV_B64_vi:
2732 
2733   case AMDGPU::V_ASHRREV_I64:
2734   case AMDGPU::V_ASHRREV_I64_gfx10:
2735   case AMDGPU::V_ASHRREV_I64_vi:
2736 
2737   case AMDGPU::V_PK_LSHLREV_B16:
2738   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2739   case AMDGPU::V_PK_LSHLREV_B16_vi:
2740 
2741   case AMDGPU::V_PK_LSHRREV_B16:
2742   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2743   case AMDGPU::V_PK_LSHRREV_B16_vi:
2744   case AMDGPU::V_PK_ASHRREV_I16:
2745   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2746   case AMDGPU::V_PK_ASHRREV_I16_vi:
2747     return true;
2748   default:
2749     return false;
2750   }
2751 }
2752 
2753 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2754 
2755   using namespace SIInstrFlags;
2756   const unsigned Opcode = Inst.getOpcode();
2757   const MCInstrDesc &Desc = MII.get(Opcode);
2758 
2759   // lds_direct register is defined so that it can be used
2760   // with 9-bit operands only. Ignore encodings which do not accept these.
2761   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2762     return true;
2763 
2764   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2765   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2766   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2767 
2768   const int SrcIndices[] = { Src1Idx, Src2Idx };
2769 
2770   // lds_direct cannot be specified as either src1 or src2.
2771   for (int SrcIdx : SrcIndices) {
2772     if (SrcIdx == -1) break;
2773     const MCOperand &Src = Inst.getOperand(SrcIdx);
2774     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2775       return false;
2776     }
2777   }
2778 
2779   if (Src0Idx == -1)
2780     return true;
2781 
2782   const MCOperand &Src = Inst.getOperand(Src0Idx);
2783   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2784     return true;
2785 
2786   // lds_direct is specified as src0. Check additional limitations.
2787   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2788 }
2789 
2790 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2791   unsigned Opcode = Inst.getOpcode();
2792   const MCInstrDesc &Desc = MII.get(Opcode);
2793   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2794     return true;
2795 
2796   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2797   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2798 
2799   const int OpIndices[] = { Src0Idx, Src1Idx };
2800 
2801   unsigned NumLiterals = 0;
2802   uint32_t LiteralValue;
2803 
2804   for (int OpIdx : OpIndices) {
2805     if (OpIdx == -1) break;
2806 
2807     const MCOperand &MO = Inst.getOperand(OpIdx);
2808     if (MO.isImm() &&
2809         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2810         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2811         !isInlineConstant(Inst, OpIdx)) {
2812       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2813       if (NumLiterals == 0 || LiteralValue != Value) {
2814         LiteralValue = Value;
2815         ++NumLiterals;
2816       }
2817     }
2818   }
2819 
2820   return NumLiterals <= 1;
2821 }
2822 
2823 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2824                                           const SMLoc &IDLoc) {
2825   if (!validateLdsDirect(Inst)) {
2826     Error(IDLoc,
2827       "invalid use of lds_direct");
2828     return false;
2829   }
2830   if (!validateSOPLiteral(Inst)) {
2831     Error(IDLoc,
2832       "only one literal operand is allowed");
2833     return false;
2834   }
2835   if (!validateConstantBusLimitations(Inst)) {
2836     Error(IDLoc,
2837       "invalid operand (violates constant bus restrictions)");
2838     return false;
2839   }
2840   if (!validateEarlyClobberLimitations(Inst)) {
2841     Error(IDLoc,
2842       "destination must be different than all sources");
2843     return false;
2844   }
2845   if (!validateIntClampSupported(Inst)) {
2846     Error(IDLoc,
2847       "integer clamping is not supported on this GPU");
2848     return false;
2849   }
2850   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2851   if (!validateMIMGD16(Inst)) {
2852     Error(IDLoc,
2853       "d16 modifier is not supported on this GPU");
2854     return false;
2855   }
2856   if (!validateMIMGDataSize(Inst)) {
2857     Error(IDLoc,
2858       "image data size does not match dmask and tfe");
2859     return false;
2860   }
2861   if (!validateMIMGAtomicDMask(Inst)) {
2862     Error(IDLoc,
2863       "invalid atomic image dmask");
2864     return false;
2865   }
2866   if (!validateMIMGGatherDMask(Inst)) {
2867     Error(IDLoc,
2868       "invalid image_gather dmask: only one bit must be set");
2869     return false;
2870   }
2871 
2872   return true;
2873 }
2874 
2875 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2876                                             const FeatureBitset &FBS,
2877                                             unsigned VariantID = 0);
2878 
2879 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2880                                               OperandVector &Operands,
2881                                               MCStreamer &Out,
2882                                               uint64_t &ErrorInfo,
2883                                               bool MatchingInlineAsm) {
2884   MCInst Inst;
2885   unsigned Result = Match_Success;
2886   for (auto Variant : getMatchedVariants()) {
2887     uint64_t EI;
2888     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2889                                   Variant);
2890     // We order match statuses from least to most specific. We use most specific
2891     // status as resulting
2892     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2893     if ((R == Match_Success) ||
2894         (R == Match_PreferE32) ||
2895         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2896         (R == Match_InvalidOperand && Result != Match_MissingFeature
2897                                    && Result != Match_PreferE32) ||
2898         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2899                                    && Result != Match_MissingFeature
2900                                    && Result != Match_PreferE32)) {
2901       Result = R;
2902       ErrorInfo = EI;
2903     }
2904     if (R == Match_Success)
2905       break;
2906   }
2907 
2908   switch (Result) {
2909   default: break;
2910   case Match_Success:
2911     if (!validateInstruction(Inst, IDLoc)) {
2912       return true;
2913     }
2914     Inst.setLoc(IDLoc);
2915     Out.EmitInstruction(Inst, getSTI());
2916     return false;
2917 
2918   case Match_MissingFeature:
2919     return Error(IDLoc, "instruction not supported on this GPU");
2920 
2921   case Match_MnemonicFail: {
2922     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2923     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2924         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2925     return Error(IDLoc, "invalid instruction" + Suggestion,
2926                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2927   }
2928 
2929   case Match_InvalidOperand: {
2930     SMLoc ErrorLoc = IDLoc;
2931     if (ErrorInfo != ~0ULL) {
2932       if (ErrorInfo >= Operands.size()) {
2933         return Error(IDLoc, "too few operands for instruction");
2934       }
2935       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2936       if (ErrorLoc == SMLoc())
2937         ErrorLoc = IDLoc;
2938     }
2939     return Error(ErrorLoc, "invalid operand for instruction");
2940   }
2941 
2942   case Match_PreferE32:
2943     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2944                         "should be encoded as e32");
2945   }
2946   llvm_unreachable("Implement any new match types added!");
2947 }
2948 
2949 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2950   int64_t Tmp = -1;
2951   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2952     return true;
2953   }
2954   if (getParser().parseAbsoluteExpression(Tmp)) {
2955     return true;
2956   }
2957   Ret = static_cast<uint32_t>(Tmp);
2958   return false;
2959 }
2960 
2961 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2962                                                uint32_t &Minor) {
2963   if (ParseAsAbsoluteExpression(Major))
2964     return TokError("invalid major version");
2965 
2966   if (getLexer().isNot(AsmToken::Comma))
2967     return TokError("minor version number required, comma expected");
2968   Lex();
2969 
2970   if (ParseAsAbsoluteExpression(Minor))
2971     return TokError("invalid minor version");
2972 
2973   return false;
2974 }
2975 
2976 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2977   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2978     return TokError("directive only supported for amdgcn architecture");
2979 
2980   std::string Target;
2981 
2982   SMLoc TargetStart = getTok().getLoc();
2983   if (getParser().parseEscapedString(Target))
2984     return true;
2985   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2986 
2987   std::string ExpectedTarget;
2988   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2989   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2990 
2991   if (Target != ExpectedTargetOS.str())
2992     return getParser().Error(TargetRange.Start, "target must match options",
2993                              TargetRange);
2994 
2995   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2996   return false;
2997 }
2998 
2999 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3000   return getParser().Error(Range.Start, "value out of range", Range);
3001 }
3002 
3003 bool AMDGPUAsmParser::calculateGPRBlocks(
3004     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3005     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3006     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3007     unsigned &SGPRBlocks) {
3008   // TODO(scott.linder): These calculations are duplicated from
3009   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3010   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3011 
3012   unsigned NumVGPRs = NextFreeVGPR;
3013   unsigned NumSGPRs = NextFreeSGPR;
3014 
3015   if (Version.Major >= 10)
3016     NumSGPRs = 0;
3017   else {
3018     unsigned MaxAddressableNumSGPRs =
3019         IsaInfo::getAddressableNumSGPRs(&getSTI());
3020 
3021     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3022         NumSGPRs > MaxAddressableNumSGPRs)
3023       return OutOfRangeError(SGPRRange);
3024 
3025     NumSGPRs +=
3026         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3027 
3028     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3029         NumSGPRs > MaxAddressableNumSGPRs)
3030       return OutOfRangeError(SGPRRange);
3031 
3032     if (Features.test(FeatureSGPRInitBug))
3033       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3034   }
3035 
3036   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3037   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3038 
3039   return false;
3040 }
3041 
3042 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3043   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3044     return TokError("directive only supported for amdgcn architecture");
3045 
3046   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3047     return TokError("directive only supported for amdhsa OS");
3048 
3049   StringRef KernelName;
3050   if (getParser().parseIdentifier(KernelName))
3051     return true;
3052 
3053   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3054 
3055   StringSet<> Seen;
3056 
3057   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3058 
3059   SMRange VGPRRange;
3060   uint64_t NextFreeVGPR = 0;
3061   SMRange SGPRRange;
3062   uint64_t NextFreeSGPR = 0;
3063   unsigned UserSGPRCount = 0;
3064   bool ReserveVCC = true;
3065   bool ReserveFlatScr = true;
3066   bool ReserveXNACK = hasXNACK();
3067 
3068   while (true) {
3069     while (getLexer().is(AsmToken::EndOfStatement))
3070       Lex();
3071 
3072     if (getLexer().isNot(AsmToken::Identifier))
3073       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3074 
3075     StringRef ID = getTok().getIdentifier();
3076     SMRange IDRange = getTok().getLocRange();
3077     Lex();
3078 
3079     if (ID == ".end_amdhsa_kernel")
3080       break;
3081 
3082     if (Seen.find(ID) != Seen.end())
3083       return TokError(".amdhsa_ directives cannot be repeated");
3084     Seen.insert(ID);
3085 
3086     SMLoc ValStart = getTok().getLoc();
3087     int64_t IVal;
3088     if (getParser().parseAbsoluteExpression(IVal))
3089       return true;
3090     SMLoc ValEnd = getTok().getLoc();
3091     SMRange ValRange = SMRange(ValStart, ValEnd);
3092 
3093     if (IVal < 0)
3094       return OutOfRangeError(ValRange);
3095 
3096     uint64_t Val = IVal;
3097 
3098 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3099   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3100     return OutOfRangeError(RANGE);                                             \
3101   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3102 
3103     if (ID == ".amdhsa_group_segment_fixed_size") {
3104       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3105         return OutOfRangeError(ValRange);
3106       KD.group_segment_fixed_size = Val;
3107     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3108       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3109         return OutOfRangeError(ValRange);
3110       KD.private_segment_fixed_size = Val;
3111     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3112       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3113                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3114                        Val, ValRange);
3115       UserSGPRCount += 4;
3116     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3117       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3118                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3119                        ValRange);
3120       UserSGPRCount += 2;
3121     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3122       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3123                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3124                        ValRange);
3125       UserSGPRCount += 2;
3126     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3127       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3128                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3129                        Val, ValRange);
3130       UserSGPRCount += 2;
3131     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3132       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3133                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3134                        ValRange);
3135       UserSGPRCount += 2;
3136     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3137       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3138                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3139                        ValRange);
3140       UserSGPRCount += 2;
3141     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3142       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3143                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3144                        Val, ValRange);
3145       UserSGPRCount += 1;
3146     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3147       PARSE_BITS_ENTRY(
3148           KD.compute_pgm_rsrc2,
3149           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3150           ValRange);
3151     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3152       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3153                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3154                        ValRange);
3155     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3156       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3157                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3158                        ValRange);
3159     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3160       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3161                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3162                        ValRange);
3163     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3164       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3165                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3166                        ValRange);
3167     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3168       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3169                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3170                        ValRange);
3171     } else if (ID == ".amdhsa_next_free_vgpr") {
3172       VGPRRange = ValRange;
3173       NextFreeVGPR = Val;
3174     } else if (ID == ".amdhsa_next_free_sgpr") {
3175       SGPRRange = ValRange;
3176       NextFreeSGPR = Val;
3177     } else if (ID == ".amdhsa_reserve_vcc") {
3178       if (!isUInt<1>(Val))
3179         return OutOfRangeError(ValRange);
3180       ReserveVCC = Val;
3181     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3182       if (IVersion.Major < 7)
3183         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3184                                  IDRange);
3185       if (!isUInt<1>(Val))
3186         return OutOfRangeError(ValRange);
3187       ReserveFlatScr = Val;
3188     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3189       if (IVersion.Major < 8)
3190         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3191                                  IDRange);
3192       if (!isUInt<1>(Val))
3193         return OutOfRangeError(ValRange);
3194       ReserveXNACK = Val;
3195     } else if (ID == ".amdhsa_float_round_mode_32") {
3196       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3197                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3198     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3199       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3200                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3201     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3202       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3203                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3204     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3205       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3206                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3207                        ValRange);
3208     } else if (ID == ".amdhsa_dx10_clamp") {
3209       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3210                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3211     } else if (ID == ".amdhsa_ieee_mode") {
3212       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3213                        Val, ValRange);
3214     } else if (ID == ".amdhsa_fp16_overflow") {
3215       if (IVersion.Major < 9)
3216         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3217                                  IDRange);
3218       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3219                        ValRange);
3220     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3221       PARSE_BITS_ENTRY(
3222           KD.compute_pgm_rsrc2,
3223           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3224           ValRange);
3225     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3226       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3227                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3228                        Val, ValRange);
3229     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3230       PARSE_BITS_ENTRY(
3231           KD.compute_pgm_rsrc2,
3232           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3233           ValRange);
3234     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3235       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3236                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3237                        Val, ValRange);
3238     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3239       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3240                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3241                        Val, ValRange);
3242     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3243       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3244                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3245                        Val, ValRange);
3246     } else if (ID == ".amdhsa_exception_int_div_zero") {
3247       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3248                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3249                        Val, ValRange);
3250     } else {
3251       return getParser().Error(IDRange.Start,
3252                                "unknown .amdhsa_kernel directive", IDRange);
3253     }
3254 
3255 #undef PARSE_BITS_ENTRY
3256   }
3257 
3258   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3259     return TokError(".amdhsa_next_free_vgpr directive is required");
3260 
3261   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3262     return TokError(".amdhsa_next_free_sgpr directive is required");
3263 
3264   unsigned VGPRBlocks;
3265   unsigned SGPRBlocks;
3266   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3267                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3268                          SGPRRange, VGPRBlocks, SGPRBlocks))
3269     return true;
3270 
3271   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3272           VGPRBlocks))
3273     return OutOfRangeError(VGPRRange);
3274   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3275                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3276 
3277   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3278           SGPRBlocks))
3279     return OutOfRangeError(SGPRRange);
3280   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3281                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3282                   SGPRBlocks);
3283 
3284   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3285     return TokError("too many user SGPRs enabled");
3286   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3287                   UserSGPRCount);
3288 
3289   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3290       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3291       ReserveFlatScr, ReserveXNACK);
3292   return false;
3293 }
3294 
3295 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3296   uint32_t Major;
3297   uint32_t Minor;
3298 
3299   if (ParseDirectiveMajorMinor(Major, Minor))
3300     return true;
3301 
3302   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3303   return false;
3304 }
3305 
3306 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3307   uint32_t Major;
3308   uint32_t Minor;
3309   uint32_t Stepping;
3310   StringRef VendorName;
3311   StringRef ArchName;
3312 
3313   // If this directive has no arguments, then use the ISA version for the
3314   // targeted GPU.
3315   if (getLexer().is(AsmToken::EndOfStatement)) {
3316     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3317     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3318                                                       ISA.Stepping,
3319                                                       "AMD", "AMDGPU");
3320     return false;
3321   }
3322 
3323   if (ParseDirectiveMajorMinor(Major, Minor))
3324     return true;
3325 
3326   if (getLexer().isNot(AsmToken::Comma))
3327     return TokError("stepping version number required, comma expected");
3328   Lex();
3329 
3330   if (ParseAsAbsoluteExpression(Stepping))
3331     return TokError("invalid stepping version");
3332 
3333   if (getLexer().isNot(AsmToken::Comma))
3334     return TokError("vendor name required, comma expected");
3335   Lex();
3336 
3337   if (getLexer().isNot(AsmToken::String))
3338     return TokError("invalid vendor name");
3339 
3340   VendorName = getLexer().getTok().getStringContents();
3341   Lex();
3342 
3343   if (getLexer().isNot(AsmToken::Comma))
3344     return TokError("arch name required, comma expected");
3345   Lex();
3346 
3347   if (getLexer().isNot(AsmToken::String))
3348     return TokError("invalid arch name");
3349 
3350   ArchName = getLexer().getTok().getStringContents();
3351   Lex();
3352 
3353   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3354                                                     VendorName, ArchName);
3355   return false;
3356 }
3357 
3358 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3359                                                amd_kernel_code_t &Header) {
3360   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3361   // assembly for backwards compatibility.
3362   if (ID == "max_scratch_backing_memory_byte_size") {
3363     Parser.eatToEndOfStatement();
3364     return false;
3365   }
3366 
3367   SmallString<40> ErrStr;
3368   raw_svector_ostream Err(ErrStr);
3369   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3370     return TokError(Err.str());
3371   }
3372   Lex();
3373   return false;
3374 }
3375 
3376 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3377   amd_kernel_code_t Header;
3378   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3379 
3380   while (true) {
3381     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3382     // will set the current token to EndOfStatement.
3383     while(getLexer().is(AsmToken::EndOfStatement))
3384       Lex();
3385 
3386     if (getLexer().isNot(AsmToken::Identifier))
3387       return TokError("expected value identifier or .end_amd_kernel_code_t");
3388 
3389     StringRef ID = getLexer().getTok().getIdentifier();
3390     Lex();
3391 
3392     if (ID == ".end_amd_kernel_code_t")
3393       break;
3394 
3395     if (ParseAMDKernelCodeTValue(ID, Header))
3396       return true;
3397   }
3398 
3399   getTargetStreamer().EmitAMDKernelCodeT(Header);
3400 
3401   return false;
3402 }
3403 
3404 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3405   if (getLexer().isNot(AsmToken::Identifier))
3406     return TokError("expected symbol name");
3407 
3408   StringRef KernelName = Parser.getTok().getString();
3409 
3410   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3411                                            ELF::STT_AMDGPU_HSA_KERNEL);
3412   Lex();
3413   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3414     KernelScope.initialize(getContext());
3415   return false;
3416 }
3417 
3418 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3419   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3420     return Error(getParser().getTok().getLoc(),
3421                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3422                  "architectures");
3423   }
3424 
3425   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3426 
3427   std::string ISAVersionStringFromSTI;
3428   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3429   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3430 
3431   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3432     return Error(getParser().getTok().getLoc(),
3433                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3434                  "arguments specified through the command line");
3435   }
3436 
3437   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3438   Lex();
3439 
3440   return false;
3441 }
3442 
3443 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3444   const char *AssemblerDirectiveBegin;
3445   const char *AssemblerDirectiveEnd;
3446   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3447       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3448           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3449                             HSAMD::V3::AssemblerDirectiveEnd)
3450           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3451                             HSAMD::AssemblerDirectiveEnd);
3452 
3453   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3454     return Error(getParser().getTok().getLoc(),
3455                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3456                  "not available on non-amdhsa OSes")).str());
3457   }
3458 
3459   std::string HSAMetadataString;
3460   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3461                           HSAMetadataString))
3462     return true;
3463 
3464   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3465     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3466       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3467   } else {
3468     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3469       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3470   }
3471 
3472   return false;
3473 }
3474 
3475 /// Common code to parse out a block of text (typically YAML) between start and
3476 /// end directives.
3477 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3478                                           const char *AssemblerDirectiveEnd,
3479                                           std::string &CollectString) {
3480 
3481   raw_string_ostream CollectStream(CollectString);
3482 
3483   getLexer().setSkipSpace(false);
3484 
3485   bool FoundEnd = false;
3486   while (!getLexer().is(AsmToken::Eof)) {
3487     while (getLexer().is(AsmToken::Space)) {
3488       CollectStream << getLexer().getTok().getString();
3489       Lex();
3490     }
3491 
3492     if (getLexer().is(AsmToken::Identifier)) {
3493       StringRef ID = getLexer().getTok().getIdentifier();
3494       if (ID == AssemblerDirectiveEnd) {
3495         Lex();
3496         FoundEnd = true;
3497         break;
3498       }
3499     }
3500 
3501     CollectStream << Parser.parseStringToEndOfStatement()
3502                   << getContext().getAsmInfo()->getSeparatorString();
3503 
3504     Parser.eatToEndOfStatement();
3505   }
3506 
3507   getLexer().setSkipSpace(true);
3508 
3509   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3510     return TokError(Twine("expected directive ") +
3511                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3512   }
3513 
3514   CollectStream.flush();
3515   return false;
3516 }
3517 
3518 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3519 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3520   std::string String;
3521   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3522                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3523     return true;
3524 
3525   auto PALMetadata = getTargetStreamer().getPALMetadata();
3526   if (!PALMetadata->setFromString(String))
3527     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3528   return false;
3529 }
3530 
3531 /// Parse the assembler directive for old linear-format PAL metadata.
3532 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3533   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3534     return Error(getParser().getTok().getLoc(),
3535                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3536                  "not available on non-amdpal OSes")).str());
3537   }
3538 
3539   auto PALMetadata = getTargetStreamer().getPALMetadata();
3540   PALMetadata->setLegacy();
3541   for (;;) {
3542     uint32_t Key, Value;
3543     if (ParseAsAbsoluteExpression(Key)) {
3544       return TokError(Twine("invalid value in ") +
3545                       Twine(PALMD::AssemblerDirective));
3546     }
3547     if (getLexer().isNot(AsmToken::Comma)) {
3548       return TokError(Twine("expected an even number of values in ") +
3549                       Twine(PALMD::AssemblerDirective));
3550     }
3551     Lex();
3552     if (ParseAsAbsoluteExpression(Value)) {
3553       return TokError(Twine("invalid value in ") +
3554                       Twine(PALMD::AssemblerDirective));
3555     }
3556     PALMetadata->setRegister(Key, Value);
3557     if (getLexer().isNot(AsmToken::Comma))
3558       break;
3559     Lex();
3560   }
3561   return false;
3562 }
3563 
3564 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3565   StringRef IDVal = DirectiveID.getString();
3566 
3567   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3568     if (IDVal == ".amdgcn_target")
3569       return ParseDirectiveAMDGCNTarget();
3570 
3571     if (IDVal == ".amdhsa_kernel")
3572       return ParseDirectiveAMDHSAKernel();
3573 
3574     // TODO: Restructure/combine with PAL metadata directive.
3575     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3576       return ParseDirectiveHSAMetadata();
3577   } else {
3578     if (IDVal == ".hsa_code_object_version")
3579       return ParseDirectiveHSACodeObjectVersion();
3580 
3581     if (IDVal == ".hsa_code_object_isa")
3582       return ParseDirectiveHSACodeObjectISA();
3583 
3584     if (IDVal == ".amd_kernel_code_t")
3585       return ParseDirectiveAMDKernelCodeT();
3586 
3587     if (IDVal == ".amdgpu_hsa_kernel")
3588       return ParseDirectiveAMDGPUHsaKernel();
3589 
3590     if (IDVal == ".amd_amdgpu_isa")
3591       return ParseDirectiveISAVersion();
3592 
3593     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3594       return ParseDirectiveHSAMetadata();
3595   }
3596 
3597   if (IDVal == PALMD::AssemblerDirectiveBegin)
3598     return ParseDirectivePALMetadataBegin();
3599 
3600   if (IDVal == PALMD::AssemblerDirective)
3601     return ParseDirectivePALMetadata();
3602 
3603   return true;
3604 }
3605 
3606 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3607                                            unsigned RegNo) const {
3608 
3609   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3610        R.isValid(); ++R) {
3611     if (*R == RegNo)
3612       return isGFX9() || isGFX10();
3613   }
3614 
3615   // GFX10 has 2 more SGPRs 104 and 105.
3616   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3617        R.isValid(); ++R) {
3618     if (*R == RegNo)
3619       return hasSGPR104_SGPR105();
3620   }
3621 
3622   switch (RegNo) {
3623   case AMDGPU::TBA:
3624   case AMDGPU::TBA_LO:
3625   case AMDGPU::TBA_HI:
3626   case AMDGPU::TMA:
3627   case AMDGPU::TMA_LO:
3628   case AMDGPU::TMA_HI:
3629     return !isGFX9() && !isGFX10();
3630   case AMDGPU::XNACK_MASK:
3631   case AMDGPU::XNACK_MASK_LO:
3632   case AMDGPU::XNACK_MASK_HI:
3633     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3634   case AMDGPU::SGPR_NULL:
3635     return isGFX10();
3636   default:
3637     break;
3638   }
3639 
3640   if (isInlineValue(RegNo))
3641     return !isCI() && !isSI() && !isVI();
3642 
3643   if (isCI())
3644     return true;
3645 
3646   if (isSI() || isGFX10()) {
3647     // No flat_scr on SI.
3648     // On GFX10 flat scratch is not a valid register operand and can only be
3649     // accessed with s_setreg/s_getreg.
3650     switch (RegNo) {
3651     case AMDGPU::FLAT_SCR:
3652     case AMDGPU::FLAT_SCR_LO:
3653     case AMDGPU::FLAT_SCR_HI:
3654       return false;
3655     default:
3656       return true;
3657     }
3658   }
3659 
3660   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3661   // SI/CI have.
3662   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3663        R.isValid(); ++R) {
3664     if (*R == RegNo)
3665       return hasSGPR102_SGPR103();
3666   }
3667 
3668   return true;
3669 }
3670 
3671 OperandMatchResultTy
3672 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3673   // Try to parse with a custom parser
3674   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3675 
3676   // If we successfully parsed the operand or if there as an error parsing,
3677   // we are done.
3678   //
3679   // If we are parsing after we reach EndOfStatement then this means we
3680   // are appending default values to the Operands list.  This is only done
3681   // by custom parser, so we shouldn't continue on to the generic parsing.
3682   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3683       getLexer().is(AsmToken::EndOfStatement))
3684     return ResTy;
3685 
3686   ResTy = parseRegOrImm(Operands);
3687 
3688   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3689     return ResTy;
3690 
3691   const auto &Tok = Parser.getTok();
3692   SMLoc S = Tok.getLoc();
3693 
3694   const MCExpr *Expr = nullptr;
3695   if (!Parser.parseExpression(Expr)) {
3696     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3697     return MatchOperand_Success;
3698   }
3699 
3700   // Possibly this is an instruction flag like 'gds'.
3701   if (Tok.getKind() == AsmToken::Identifier) {
3702     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3703     Parser.Lex();
3704     return MatchOperand_Success;
3705   }
3706 
3707   return MatchOperand_NoMatch;
3708 }
3709 
3710 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3711   // Clear any forced encodings from the previous instruction.
3712   setForcedEncodingSize(0);
3713   setForcedDPP(false);
3714   setForcedSDWA(false);
3715 
3716   if (Name.endswith("_e64")) {
3717     setForcedEncodingSize(64);
3718     return Name.substr(0, Name.size() - 4);
3719   } else if (Name.endswith("_e32")) {
3720     setForcedEncodingSize(32);
3721     return Name.substr(0, Name.size() - 4);
3722   } else if (Name.endswith("_dpp")) {
3723     setForcedDPP(true);
3724     return Name.substr(0, Name.size() - 4);
3725   } else if (Name.endswith("_sdwa")) {
3726     setForcedSDWA(true);
3727     return Name.substr(0, Name.size() - 5);
3728   }
3729   return Name;
3730 }
3731 
3732 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3733                                        StringRef Name,
3734                                        SMLoc NameLoc, OperandVector &Operands) {
3735   // Add the instruction mnemonic
3736   Name = parseMnemonicSuffix(Name);
3737   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3738 
3739   while (!getLexer().is(AsmToken::EndOfStatement)) {
3740     OperandMatchResultTy Res = parseOperand(Operands, Name);
3741 
3742     // Eat the comma or space if there is one.
3743     if (getLexer().is(AsmToken::Comma))
3744       Parser.Lex();
3745 
3746     switch (Res) {
3747       case MatchOperand_Success: break;
3748       case MatchOperand_ParseFail:
3749         Error(getLexer().getLoc(), "failed parsing operand.");
3750         while (!getLexer().is(AsmToken::EndOfStatement)) {
3751           Parser.Lex();
3752         }
3753         return true;
3754       case MatchOperand_NoMatch:
3755         Error(getLexer().getLoc(), "not a valid operand.");
3756         while (!getLexer().is(AsmToken::EndOfStatement)) {
3757           Parser.Lex();
3758         }
3759         return true;
3760     }
3761   }
3762 
3763   return false;
3764 }
3765 
3766 //===----------------------------------------------------------------------===//
3767 // Utility functions
3768 //===----------------------------------------------------------------------===//
3769 
3770 OperandMatchResultTy
3771 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3772   switch(getLexer().getKind()) {
3773     default: return MatchOperand_NoMatch;
3774     case AsmToken::Identifier: {
3775       StringRef Name = Parser.getTok().getString();
3776       if (!Name.equals(Prefix)) {
3777         return MatchOperand_NoMatch;
3778       }
3779 
3780       Parser.Lex();
3781       if (getLexer().isNot(AsmToken::Colon))
3782         return MatchOperand_ParseFail;
3783 
3784       Parser.Lex();
3785 
3786       bool IsMinus = false;
3787       if (getLexer().getKind() == AsmToken::Minus) {
3788         Parser.Lex();
3789         IsMinus = true;
3790       }
3791 
3792       if (getLexer().isNot(AsmToken::Integer))
3793         return MatchOperand_ParseFail;
3794 
3795       if (getParser().parseAbsoluteExpression(Int))
3796         return MatchOperand_ParseFail;
3797 
3798       if (IsMinus)
3799         Int = -Int;
3800       break;
3801     }
3802   }
3803   return MatchOperand_Success;
3804 }
3805 
3806 OperandMatchResultTy
3807 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3808                                     AMDGPUOperand::ImmTy ImmTy,
3809                                     bool (*ConvertResult)(int64_t&)) {
3810   SMLoc S = Parser.getTok().getLoc();
3811   int64_t Value = 0;
3812 
3813   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3814   if (Res != MatchOperand_Success)
3815     return Res;
3816 
3817   if (ConvertResult && !ConvertResult(Value)) {
3818     return MatchOperand_ParseFail;
3819   }
3820 
3821   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3822   return MatchOperand_Success;
3823 }
3824 
3825 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3826   const char *Prefix,
3827   OperandVector &Operands,
3828   AMDGPUOperand::ImmTy ImmTy,
3829   bool (*ConvertResult)(int64_t&)) {
3830   StringRef Name = Parser.getTok().getString();
3831   if (!Name.equals(Prefix))
3832     return MatchOperand_NoMatch;
3833 
3834   Parser.Lex();
3835   if (getLexer().isNot(AsmToken::Colon))
3836     return MatchOperand_ParseFail;
3837 
3838   Parser.Lex();
3839   if (getLexer().isNot(AsmToken::LBrac))
3840     return MatchOperand_ParseFail;
3841   Parser.Lex();
3842 
3843   unsigned Val = 0;
3844   SMLoc S = Parser.getTok().getLoc();
3845 
3846   // FIXME: How to verify the number of elements matches the number of src
3847   // operands?
3848   for (int I = 0; I < 4; ++I) {
3849     if (I != 0) {
3850       if (getLexer().is(AsmToken::RBrac))
3851         break;
3852 
3853       if (getLexer().isNot(AsmToken::Comma))
3854         return MatchOperand_ParseFail;
3855       Parser.Lex();
3856     }
3857 
3858     if (getLexer().isNot(AsmToken::Integer))
3859       return MatchOperand_ParseFail;
3860 
3861     int64_t Op;
3862     if (getParser().parseAbsoluteExpression(Op))
3863       return MatchOperand_ParseFail;
3864 
3865     if (Op != 0 && Op != 1)
3866       return MatchOperand_ParseFail;
3867     Val |= (Op << I);
3868   }
3869 
3870   Parser.Lex();
3871   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3872   return MatchOperand_Success;
3873 }
3874 
3875 OperandMatchResultTy
3876 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3877                                AMDGPUOperand::ImmTy ImmTy) {
3878   int64_t Bit = 0;
3879   SMLoc S = Parser.getTok().getLoc();
3880 
3881   // We are at the end of the statement, and this is a default argument, so
3882   // use a default value.
3883   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3884     switch(getLexer().getKind()) {
3885       case AsmToken::Identifier: {
3886         StringRef Tok = Parser.getTok().getString();
3887         if (Tok == Name) {
3888           if (Tok == "r128" && isGFX9())
3889             Error(S, "r128 modifier is not supported on this GPU");
3890           if (Tok == "a16" && !isGFX9())
3891             Error(S, "a16 modifier is not supported on this GPU");
3892           Bit = 1;
3893           Parser.Lex();
3894         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3895           Bit = 0;
3896           Parser.Lex();
3897         } else {
3898           return MatchOperand_NoMatch;
3899         }
3900         break;
3901       }
3902       default:
3903         return MatchOperand_NoMatch;
3904     }
3905   }
3906 
3907   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
3908     return MatchOperand_ParseFail;
3909 
3910   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3911   return MatchOperand_Success;
3912 }
3913 
3914 static void addOptionalImmOperand(
3915   MCInst& Inst, const OperandVector& Operands,
3916   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3917   AMDGPUOperand::ImmTy ImmT,
3918   int64_t Default = 0) {
3919   auto i = OptionalIdx.find(ImmT);
3920   if (i != OptionalIdx.end()) {
3921     unsigned Idx = i->second;
3922     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3923   } else {
3924     Inst.addOperand(MCOperand::createImm(Default));
3925   }
3926 }
3927 
3928 OperandMatchResultTy
3929 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3930   if (getLexer().isNot(AsmToken::Identifier)) {
3931     return MatchOperand_NoMatch;
3932   }
3933   StringRef Tok = Parser.getTok().getString();
3934   if (Tok != Prefix) {
3935     return MatchOperand_NoMatch;
3936   }
3937 
3938   Parser.Lex();
3939   if (getLexer().isNot(AsmToken::Colon)) {
3940     return MatchOperand_ParseFail;
3941   }
3942 
3943   Parser.Lex();
3944   if (getLexer().isNot(AsmToken::Identifier)) {
3945     return MatchOperand_ParseFail;
3946   }
3947 
3948   Value = Parser.getTok().getString();
3949   return MatchOperand_Success;
3950 }
3951 
3952 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3953 // values to live in a joint format operand in the MCInst encoding.
3954 OperandMatchResultTy
3955 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3956   SMLoc S = Parser.getTok().getLoc();
3957   int64_t Dfmt = 0, Nfmt = 0;
3958   // dfmt and nfmt can appear in either order, and each is optional.
3959   bool GotDfmt = false, GotNfmt = false;
3960   while (!GotDfmt || !GotNfmt) {
3961     if (!GotDfmt) {
3962       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3963       if (Res != MatchOperand_NoMatch) {
3964         if (Res != MatchOperand_Success)
3965           return Res;
3966         if (Dfmt >= 16) {
3967           Error(Parser.getTok().getLoc(), "out of range dfmt");
3968           return MatchOperand_ParseFail;
3969         }
3970         GotDfmt = true;
3971         Parser.Lex();
3972         continue;
3973       }
3974     }
3975     if (!GotNfmt) {
3976       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3977       if (Res != MatchOperand_NoMatch) {
3978         if (Res != MatchOperand_Success)
3979           return Res;
3980         if (Nfmt >= 8) {
3981           Error(Parser.getTok().getLoc(), "out of range nfmt");
3982           return MatchOperand_ParseFail;
3983         }
3984         GotNfmt = true;
3985         Parser.Lex();
3986         continue;
3987       }
3988     }
3989     break;
3990   }
3991   if (!GotDfmt && !GotNfmt)
3992     return MatchOperand_NoMatch;
3993   auto Format = Dfmt | Nfmt << 4;
3994   Operands.push_back(
3995       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3996   return MatchOperand_Success;
3997 }
3998 
3999 //===----------------------------------------------------------------------===//
4000 // ds
4001 //===----------------------------------------------------------------------===//
4002 
4003 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4004                                     const OperandVector &Operands) {
4005   OptionalImmIndexMap OptionalIdx;
4006 
4007   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4008     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4009 
4010     // Add the register arguments
4011     if (Op.isReg()) {
4012       Op.addRegOperands(Inst, 1);
4013       continue;
4014     }
4015 
4016     // Handle optional arguments
4017     OptionalIdx[Op.getImmTy()] = i;
4018   }
4019 
4020   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4021   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4022   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4023 
4024   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4025 }
4026 
4027 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4028                                 bool IsGdsHardcoded) {
4029   OptionalImmIndexMap OptionalIdx;
4030 
4031   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4032     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4033 
4034     // Add the register arguments
4035     if (Op.isReg()) {
4036       Op.addRegOperands(Inst, 1);
4037       continue;
4038     }
4039 
4040     if (Op.isToken() && Op.getToken() == "gds") {
4041       IsGdsHardcoded = true;
4042       continue;
4043     }
4044 
4045     // Handle optional arguments
4046     OptionalIdx[Op.getImmTy()] = i;
4047   }
4048 
4049   AMDGPUOperand::ImmTy OffsetType =
4050     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
4051      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4052                                                       AMDGPUOperand::ImmTyOffset;
4053 
4054   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4055 
4056   if (!IsGdsHardcoded) {
4057     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4058   }
4059   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4060 }
4061 
4062 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4063   OptionalImmIndexMap OptionalIdx;
4064 
4065   unsigned OperandIdx[4];
4066   unsigned EnMask = 0;
4067   int SrcIdx = 0;
4068 
4069   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4070     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4071 
4072     // Add the register arguments
4073     if (Op.isReg()) {
4074       assert(SrcIdx < 4);
4075       OperandIdx[SrcIdx] = Inst.size();
4076       Op.addRegOperands(Inst, 1);
4077       ++SrcIdx;
4078       continue;
4079     }
4080 
4081     if (Op.isOff()) {
4082       assert(SrcIdx < 4);
4083       OperandIdx[SrcIdx] = Inst.size();
4084       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4085       ++SrcIdx;
4086       continue;
4087     }
4088 
4089     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4090       Op.addImmOperands(Inst, 1);
4091       continue;
4092     }
4093 
4094     if (Op.isToken() && Op.getToken() == "done")
4095       continue;
4096 
4097     // Handle optional arguments
4098     OptionalIdx[Op.getImmTy()] = i;
4099   }
4100 
4101   assert(SrcIdx == 4);
4102 
4103   bool Compr = false;
4104   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4105     Compr = true;
4106     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4107     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4108     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4109   }
4110 
4111   for (auto i = 0; i < SrcIdx; ++i) {
4112     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4113       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4114     }
4115   }
4116 
4117   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4118   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4119 
4120   Inst.addOperand(MCOperand::createImm(EnMask));
4121 }
4122 
4123 //===----------------------------------------------------------------------===//
4124 // s_waitcnt
4125 //===----------------------------------------------------------------------===//
4126 
4127 static bool
4128 encodeCnt(
4129   const AMDGPU::IsaVersion ISA,
4130   int64_t &IntVal,
4131   int64_t CntVal,
4132   bool Saturate,
4133   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4134   unsigned (*decode)(const IsaVersion &Version, unsigned))
4135 {
4136   bool Failed = false;
4137 
4138   IntVal = encode(ISA, IntVal, CntVal);
4139   if (CntVal != decode(ISA, IntVal)) {
4140     if (Saturate) {
4141       IntVal = encode(ISA, IntVal, -1);
4142     } else {
4143       Failed = true;
4144     }
4145   }
4146   return Failed;
4147 }
4148 
4149 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4150   StringRef CntName = Parser.getTok().getString();
4151   int64_t CntVal;
4152 
4153   Parser.Lex();
4154   if (getLexer().isNot(AsmToken::LParen))
4155     return true;
4156 
4157   Parser.Lex();
4158   if (getLexer().isNot(AsmToken::Integer))
4159     return true;
4160 
4161   SMLoc ValLoc = Parser.getTok().getLoc();
4162   if (getParser().parseAbsoluteExpression(CntVal))
4163     return true;
4164 
4165   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4166 
4167   bool Failed = true;
4168   bool Sat = CntName.endswith("_sat");
4169 
4170   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4171     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4172   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4173     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4174   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4175     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4176   }
4177 
4178   if (Failed) {
4179     Error(ValLoc, "too large value for " + CntName);
4180     return true;
4181   }
4182 
4183   if (getLexer().isNot(AsmToken::RParen)) {
4184     return true;
4185   }
4186 
4187   Parser.Lex();
4188   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4189     const AsmToken NextToken = getLexer().peekTok();
4190     if (NextToken.is(AsmToken::Identifier)) {
4191       Parser.Lex();
4192     }
4193   }
4194 
4195   return false;
4196 }
4197 
4198 OperandMatchResultTy
4199 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4200   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4201   int64_t Waitcnt = getWaitcntBitMask(ISA);
4202   SMLoc S = Parser.getTok().getLoc();
4203 
4204   switch(getLexer().getKind()) {
4205     default: return MatchOperand_ParseFail;
4206     case AsmToken::Integer:
4207       // The operand can be an integer value.
4208       if (getParser().parseAbsoluteExpression(Waitcnt))
4209         return MatchOperand_ParseFail;
4210       break;
4211 
4212     case AsmToken::Identifier:
4213       do {
4214         if (parseCnt(Waitcnt))
4215           return MatchOperand_ParseFail;
4216       } while(getLexer().isNot(AsmToken::EndOfStatement));
4217       break;
4218   }
4219   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4220   return MatchOperand_Success;
4221 }
4222 
4223 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4224                                           int64_t &Width) {
4225   using namespace llvm::AMDGPU::Hwreg;
4226 
4227   if (Parser.getTok().getString() != "hwreg")
4228     return true;
4229   Parser.Lex();
4230 
4231   if (getLexer().isNot(AsmToken::LParen))
4232     return true;
4233   Parser.Lex();
4234 
4235   if (getLexer().is(AsmToken::Identifier)) {
4236     HwReg.IsSymbolic = true;
4237     HwReg.Id = ID_UNKNOWN_;
4238     const StringRef tok = Parser.getTok().getString();
4239     int Last = ID_SYMBOLIC_LAST_;
4240     if (isSI() || isCI() || isVI())
4241       Last = ID_SYMBOLIC_FIRST_GFX9_;
4242     else if (isGFX9())
4243       Last = ID_SYMBOLIC_FIRST_GFX10_;
4244     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4245       if (tok == IdSymbolic[i]) {
4246         HwReg.Id = i;
4247         break;
4248       }
4249     }
4250     Parser.Lex();
4251   } else {
4252     HwReg.IsSymbolic = false;
4253     if (getLexer().isNot(AsmToken::Integer))
4254       return true;
4255     if (getParser().parseAbsoluteExpression(HwReg.Id))
4256       return true;
4257   }
4258 
4259   if (getLexer().is(AsmToken::RParen)) {
4260     Parser.Lex();
4261     return false;
4262   }
4263 
4264   // optional params
4265   if (getLexer().isNot(AsmToken::Comma))
4266     return true;
4267   Parser.Lex();
4268 
4269   if (getLexer().isNot(AsmToken::Integer))
4270     return true;
4271   if (getParser().parseAbsoluteExpression(Offset))
4272     return true;
4273 
4274   if (getLexer().isNot(AsmToken::Comma))
4275     return true;
4276   Parser.Lex();
4277 
4278   if (getLexer().isNot(AsmToken::Integer))
4279     return true;
4280   if (getParser().parseAbsoluteExpression(Width))
4281     return true;
4282 
4283   if (getLexer().isNot(AsmToken::RParen))
4284     return true;
4285   Parser.Lex();
4286 
4287   return false;
4288 }
4289 
4290 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4291   using namespace llvm::AMDGPU::Hwreg;
4292 
4293   int64_t Imm16Val = 0;
4294   SMLoc S = Parser.getTok().getLoc();
4295 
4296   switch(getLexer().getKind()) {
4297     default: return MatchOperand_NoMatch;
4298     case AsmToken::Integer:
4299       // The operand can be an integer value.
4300       if (getParser().parseAbsoluteExpression(Imm16Val))
4301         return MatchOperand_NoMatch;
4302       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4303         Error(S, "invalid immediate: only 16-bit values are legal");
4304         // Do not return error code, but create an imm operand anyway and proceed
4305         // to the next operand, if any. That avoids unneccessary error messages.
4306       }
4307       break;
4308 
4309     case AsmToken::Identifier: {
4310         OperandInfoTy HwReg(ID_UNKNOWN_);
4311         int64_t Offset = OFFSET_DEFAULT_;
4312         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4313         if (parseHwregConstruct(HwReg, Offset, Width))
4314           return MatchOperand_ParseFail;
4315         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4316           if (HwReg.IsSymbolic)
4317             Error(S, "invalid symbolic name of hardware register");
4318           else
4319             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4320         }
4321         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4322           Error(S, "invalid bit offset: only 5-bit values are legal");
4323         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4324           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4325         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4326       }
4327       break;
4328   }
4329   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4330   return MatchOperand_Success;
4331 }
4332 
4333 bool AMDGPUOperand::isSWaitCnt() const {
4334   return isImm();
4335 }
4336 
4337 bool AMDGPUOperand::isHwreg() const {
4338   return isImmTy(ImmTyHwreg);
4339 }
4340 
4341 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4342   using namespace llvm::AMDGPU::SendMsg;
4343 
4344   if (Parser.getTok().getString() != "sendmsg")
4345     return true;
4346   Parser.Lex();
4347 
4348   if (getLexer().isNot(AsmToken::LParen))
4349     return true;
4350   Parser.Lex();
4351 
4352   if (getLexer().is(AsmToken::Identifier)) {
4353     Msg.IsSymbolic = true;
4354     Msg.Id = ID_UNKNOWN_;
4355     const std::string tok = Parser.getTok().getString();
4356     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4357       switch(i) {
4358         default: continue; // Omit gaps.
4359         case ID_GS_ALLOC_REQ:
4360           if (isSI() || isCI() || isVI())
4361             continue;
4362           break;
4363         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4364         case ID_SYSMSG: break;
4365       }
4366       if (tok == IdSymbolic[i]) {
4367         Msg.Id = i;
4368         break;
4369       }
4370     }
4371     Parser.Lex();
4372   } else {
4373     Msg.IsSymbolic = false;
4374     if (getLexer().isNot(AsmToken::Integer))
4375       return true;
4376     if (getParser().parseAbsoluteExpression(Msg.Id))
4377       return true;
4378     if (getLexer().is(AsmToken::Integer))
4379       if (getParser().parseAbsoluteExpression(Msg.Id))
4380         Msg.Id = ID_UNKNOWN_;
4381   }
4382   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4383     return false;
4384 
4385   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4386     if (getLexer().isNot(AsmToken::RParen))
4387       return true;
4388     Parser.Lex();
4389     return false;
4390   }
4391 
4392   if (getLexer().isNot(AsmToken::Comma))
4393     return true;
4394   Parser.Lex();
4395 
4396   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4397   Operation.Id = ID_UNKNOWN_;
4398   if (getLexer().is(AsmToken::Identifier)) {
4399     Operation.IsSymbolic = true;
4400     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4401     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4402     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4403     const StringRef Tok = Parser.getTok().getString();
4404     for (int i = F; i < L; ++i) {
4405       if (Tok == S[i]) {
4406         Operation.Id = i;
4407         break;
4408       }
4409     }
4410     Parser.Lex();
4411   } else {
4412     Operation.IsSymbolic = false;
4413     if (getLexer().isNot(AsmToken::Integer))
4414       return true;
4415     if (getParser().parseAbsoluteExpression(Operation.Id))
4416       return true;
4417   }
4418 
4419   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4420     // Stream id is optional.
4421     if (getLexer().is(AsmToken::RParen)) {
4422       Parser.Lex();
4423       return false;
4424     }
4425 
4426     if (getLexer().isNot(AsmToken::Comma))
4427       return true;
4428     Parser.Lex();
4429 
4430     if (getLexer().isNot(AsmToken::Integer))
4431       return true;
4432     if (getParser().parseAbsoluteExpression(StreamId))
4433       return true;
4434   }
4435 
4436   if (getLexer().isNot(AsmToken::RParen))
4437     return true;
4438   Parser.Lex();
4439   return false;
4440 }
4441 
4442 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4443   if (getLexer().getKind() != AsmToken::Identifier)
4444     return MatchOperand_NoMatch;
4445 
4446   StringRef Str = Parser.getTok().getString();
4447   int Slot = StringSwitch<int>(Str)
4448     .Case("p10", 0)
4449     .Case("p20", 1)
4450     .Case("p0", 2)
4451     .Default(-1);
4452 
4453   SMLoc S = Parser.getTok().getLoc();
4454   if (Slot == -1)
4455     return MatchOperand_ParseFail;
4456 
4457   Parser.Lex();
4458   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4459                                               AMDGPUOperand::ImmTyInterpSlot));
4460   return MatchOperand_Success;
4461 }
4462 
4463 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4464   if (getLexer().getKind() != AsmToken::Identifier)
4465     return MatchOperand_NoMatch;
4466 
4467   StringRef Str = Parser.getTok().getString();
4468   if (!Str.startswith("attr"))
4469     return MatchOperand_NoMatch;
4470 
4471   StringRef Chan = Str.take_back(2);
4472   int AttrChan = StringSwitch<int>(Chan)
4473     .Case(".x", 0)
4474     .Case(".y", 1)
4475     .Case(".z", 2)
4476     .Case(".w", 3)
4477     .Default(-1);
4478   if (AttrChan == -1)
4479     return MatchOperand_ParseFail;
4480 
4481   Str = Str.drop_back(2).drop_front(4);
4482 
4483   uint8_t Attr;
4484   if (Str.getAsInteger(10, Attr))
4485     return MatchOperand_ParseFail;
4486 
4487   SMLoc S = Parser.getTok().getLoc();
4488   Parser.Lex();
4489   if (Attr > 63) {
4490     Error(S, "out of bounds attr");
4491     return MatchOperand_Success;
4492   }
4493 
4494   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4495 
4496   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4497                                               AMDGPUOperand::ImmTyInterpAttr));
4498   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4499                                               AMDGPUOperand::ImmTyAttrChan));
4500   return MatchOperand_Success;
4501 }
4502 
4503 void AMDGPUAsmParser::errorExpTgt() {
4504   Error(Parser.getTok().getLoc(), "invalid exp target");
4505 }
4506 
4507 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4508                                                       uint8_t &Val) {
4509   if (Str == "null") {
4510     Val = 9;
4511     return MatchOperand_Success;
4512   }
4513 
4514   if (Str.startswith("mrt")) {
4515     Str = Str.drop_front(3);
4516     if (Str == "z") { // == mrtz
4517       Val = 8;
4518       return MatchOperand_Success;
4519     }
4520 
4521     if (Str.getAsInteger(10, Val))
4522       return MatchOperand_ParseFail;
4523 
4524     if (Val > 7)
4525       errorExpTgt();
4526 
4527     return MatchOperand_Success;
4528   }
4529 
4530   if (Str.startswith("pos")) {
4531     Str = Str.drop_front(3);
4532     if (Str.getAsInteger(10, Val))
4533       return MatchOperand_ParseFail;
4534 
4535     if (Val > 3)
4536       errorExpTgt();
4537 
4538     Val += 12;
4539     return MatchOperand_Success;
4540   }
4541 
4542   if (Str.startswith("param")) {
4543     Str = Str.drop_front(5);
4544     if (Str.getAsInteger(10, Val))
4545       return MatchOperand_ParseFail;
4546 
4547     if (Val >= 32)
4548       errorExpTgt();
4549 
4550     Val += 32;
4551     return MatchOperand_Success;
4552   }
4553 
4554   if (Str.startswith("invalid_target_")) {
4555     Str = Str.drop_front(15);
4556     if (Str.getAsInteger(10, Val))
4557       return MatchOperand_ParseFail;
4558 
4559     errorExpTgt();
4560     return MatchOperand_Success;
4561   }
4562 
4563   return MatchOperand_NoMatch;
4564 }
4565 
4566 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4567   uint8_t Val;
4568   StringRef Str = Parser.getTok().getString();
4569 
4570   auto Res = parseExpTgtImpl(Str, Val);
4571   if (Res != MatchOperand_Success)
4572     return Res;
4573 
4574   SMLoc S = Parser.getTok().getLoc();
4575   Parser.Lex();
4576 
4577   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4578                                               AMDGPUOperand::ImmTyExpTgt));
4579   return MatchOperand_Success;
4580 }
4581 
4582 OperandMatchResultTy
4583 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4584   using namespace llvm::AMDGPU::SendMsg;
4585 
4586   int64_t Imm16Val = 0;
4587   SMLoc S = Parser.getTok().getLoc();
4588 
4589   switch(getLexer().getKind()) {
4590   default:
4591     return MatchOperand_NoMatch;
4592   case AsmToken::Integer:
4593     // The operand can be an integer value.
4594     if (getParser().parseAbsoluteExpression(Imm16Val))
4595       return MatchOperand_NoMatch;
4596     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4597       Error(S, "invalid immediate: only 16-bit values are legal");
4598       // Do not return error code, but create an imm operand anyway and proceed
4599       // to the next operand, if any. That avoids unneccessary error messages.
4600     }
4601     break;
4602   case AsmToken::Identifier: {
4603       OperandInfoTy Msg(ID_UNKNOWN_);
4604       OperandInfoTy Operation(OP_UNKNOWN_);
4605       int64_t StreamId = STREAM_ID_DEFAULT_;
4606       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4607         return MatchOperand_ParseFail;
4608       do {
4609         // Validate and encode message ID.
4610         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4611                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4612                 || Msg.Id == ID_SYSMSG)) {
4613           if (Msg.IsSymbolic)
4614             Error(S, "invalid/unsupported symbolic name of message");
4615           else
4616             Error(S, "invalid/unsupported code of message");
4617           break;
4618         }
4619         Imm16Val = (Msg.Id << ID_SHIFT_);
4620         // Validate and encode operation ID.
4621         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4622           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4623             if (Operation.IsSymbolic)
4624               Error(S, "invalid symbolic name of GS_OP");
4625             else
4626               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4627             break;
4628           }
4629           if (Operation.Id == OP_GS_NOP
4630               && Msg.Id != ID_GS_DONE) {
4631             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4632             break;
4633           }
4634           Imm16Val |= (Operation.Id << OP_SHIFT_);
4635         }
4636         if (Msg.Id == ID_SYSMSG) {
4637           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4638             if (Operation.IsSymbolic)
4639               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4640             else
4641               Error(S, "invalid/unsupported code of SYSMSG_OP");
4642             break;
4643           }
4644           Imm16Val |= (Operation.Id << OP_SHIFT_);
4645         }
4646         // Validate and encode stream ID.
4647         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4648           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4649             Error(S, "invalid stream id: only 2-bit values are legal");
4650             break;
4651           }
4652           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4653         }
4654       } while (false);
4655     }
4656     break;
4657   }
4658   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4659   return MatchOperand_Success;
4660 }
4661 
4662 bool AMDGPUOperand::isSendMsg() const {
4663   return isImmTy(ImmTySendMsg);
4664 }
4665 
4666 //===----------------------------------------------------------------------===//
4667 // parser helpers
4668 //===----------------------------------------------------------------------===//
4669 
4670 bool
4671 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4672   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4673 }
4674 
4675 bool
4676 AMDGPUAsmParser::isId(const StringRef Id) const {
4677   return isId(getToken(), Id);
4678 }
4679 
4680 bool
4681 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4682   return getTokenKind() == Kind;
4683 }
4684 
4685 bool
4686 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4687   if (isId(Id)) {
4688     lex();
4689     return true;
4690   }
4691   return false;
4692 }
4693 
4694 bool
4695 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4696   if (isToken(Kind)) {
4697     lex();
4698     return true;
4699   }
4700   return false;
4701 }
4702 
4703 bool
4704 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4705                            const StringRef ErrMsg) {
4706   if (!trySkipToken(Kind)) {
4707     Error(getLoc(), ErrMsg);
4708     return false;
4709   }
4710   return true;
4711 }
4712 
4713 bool
4714 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4715   return !getParser().parseAbsoluteExpression(Imm);
4716 }
4717 
4718 bool
4719 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4720   if (isToken(AsmToken::String)) {
4721     Val = getToken().getStringContents();
4722     lex();
4723     return true;
4724   } else {
4725     Error(getLoc(), ErrMsg);
4726     return false;
4727   }
4728 }
4729 
4730 AsmToken
4731 AMDGPUAsmParser::getToken() const {
4732   return Parser.getTok();
4733 }
4734 
4735 AsmToken
4736 AMDGPUAsmParser::peekToken() {
4737   return getLexer().peekTok();
4738 }
4739 
4740 void
4741 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4742   auto TokCount = getLexer().peekTokens(Tokens);
4743 
4744   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4745     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4746 }
4747 
4748 AsmToken::TokenKind
4749 AMDGPUAsmParser::getTokenKind() const {
4750   return getLexer().getKind();
4751 }
4752 
4753 SMLoc
4754 AMDGPUAsmParser::getLoc() const {
4755   return getToken().getLoc();
4756 }
4757 
4758 StringRef
4759 AMDGPUAsmParser::getTokenStr() const {
4760   return getToken().getString();
4761 }
4762 
4763 void
4764 AMDGPUAsmParser::lex() {
4765   Parser.Lex();
4766 }
4767 
4768 //===----------------------------------------------------------------------===//
4769 // swizzle
4770 //===----------------------------------------------------------------------===//
4771 
4772 LLVM_READNONE
4773 static unsigned
4774 encodeBitmaskPerm(const unsigned AndMask,
4775                   const unsigned OrMask,
4776                   const unsigned XorMask) {
4777   using namespace llvm::AMDGPU::Swizzle;
4778 
4779   return BITMASK_PERM_ENC |
4780          (AndMask << BITMASK_AND_SHIFT) |
4781          (OrMask  << BITMASK_OR_SHIFT)  |
4782          (XorMask << BITMASK_XOR_SHIFT);
4783 }
4784 
4785 bool
4786 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4787                                       const unsigned MinVal,
4788                                       const unsigned MaxVal,
4789                                       const StringRef ErrMsg) {
4790   for (unsigned i = 0; i < OpNum; ++i) {
4791     if (!skipToken(AsmToken::Comma, "expected a comma")){
4792       return false;
4793     }
4794     SMLoc ExprLoc = Parser.getTok().getLoc();
4795     if (!parseExpr(Op[i])) {
4796       return false;
4797     }
4798     if (Op[i] < MinVal || Op[i] > MaxVal) {
4799       Error(ExprLoc, ErrMsg);
4800       return false;
4801     }
4802   }
4803 
4804   return true;
4805 }
4806 
4807 bool
4808 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4809   using namespace llvm::AMDGPU::Swizzle;
4810 
4811   int64_t Lane[LANE_NUM];
4812   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4813                            "expected a 2-bit lane id")) {
4814     Imm = QUAD_PERM_ENC;
4815     for (unsigned I = 0; I < LANE_NUM; ++I) {
4816       Imm |= Lane[I] << (LANE_SHIFT * I);
4817     }
4818     return true;
4819   }
4820   return false;
4821 }
4822 
4823 bool
4824 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4825   using namespace llvm::AMDGPU::Swizzle;
4826 
4827   SMLoc S = Parser.getTok().getLoc();
4828   int64_t GroupSize;
4829   int64_t LaneIdx;
4830 
4831   if (!parseSwizzleOperands(1, &GroupSize,
4832                             2, 32,
4833                             "group size must be in the interval [2,32]")) {
4834     return false;
4835   }
4836   if (!isPowerOf2_64(GroupSize)) {
4837     Error(S, "group size must be a power of two");
4838     return false;
4839   }
4840   if (parseSwizzleOperands(1, &LaneIdx,
4841                            0, GroupSize - 1,
4842                            "lane id must be in the interval [0,group size - 1]")) {
4843     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4844     return true;
4845   }
4846   return false;
4847 }
4848 
4849 bool
4850 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4851   using namespace llvm::AMDGPU::Swizzle;
4852 
4853   SMLoc S = Parser.getTok().getLoc();
4854   int64_t GroupSize;
4855 
4856   if (!parseSwizzleOperands(1, &GroupSize,
4857       2, 32, "group size must be in the interval [2,32]")) {
4858     return false;
4859   }
4860   if (!isPowerOf2_64(GroupSize)) {
4861     Error(S, "group size must be a power of two");
4862     return false;
4863   }
4864 
4865   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4866   return true;
4867 }
4868 
4869 bool
4870 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4871   using namespace llvm::AMDGPU::Swizzle;
4872 
4873   SMLoc S = Parser.getTok().getLoc();
4874   int64_t GroupSize;
4875 
4876   if (!parseSwizzleOperands(1, &GroupSize,
4877       1, 16, "group size must be in the interval [1,16]")) {
4878     return false;
4879   }
4880   if (!isPowerOf2_64(GroupSize)) {
4881     Error(S, "group size must be a power of two");
4882     return false;
4883   }
4884 
4885   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4886   return true;
4887 }
4888 
4889 bool
4890 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4891   using namespace llvm::AMDGPU::Swizzle;
4892 
4893   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4894     return false;
4895   }
4896 
4897   StringRef Ctl;
4898   SMLoc StrLoc = Parser.getTok().getLoc();
4899   if (!parseString(Ctl)) {
4900     return false;
4901   }
4902   if (Ctl.size() != BITMASK_WIDTH) {
4903     Error(StrLoc, "expected a 5-character mask");
4904     return false;
4905   }
4906 
4907   unsigned AndMask = 0;
4908   unsigned OrMask = 0;
4909   unsigned XorMask = 0;
4910 
4911   for (size_t i = 0; i < Ctl.size(); ++i) {
4912     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4913     switch(Ctl[i]) {
4914     default:
4915       Error(StrLoc, "invalid mask");
4916       return false;
4917     case '0':
4918       break;
4919     case '1':
4920       OrMask |= Mask;
4921       break;
4922     case 'p':
4923       AndMask |= Mask;
4924       break;
4925     case 'i':
4926       AndMask |= Mask;
4927       XorMask |= Mask;
4928       break;
4929     }
4930   }
4931 
4932   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4933   return true;
4934 }
4935 
4936 bool
4937 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4938 
4939   SMLoc OffsetLoc = Parser.getTok().getLoc();
4940 
4941   if (!parseExpr(Imm)) {
4942     return false;
4943   }
4944   if (!isUInt<16>(Imm)) {
4945     Error(OffsetLoc, "expected a 16-bit offset");
4946     return false;
4947   }
4948   return true;
4949 }
4950 
4951 bool
4952 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4953   using namespace llvm::AMDGPU::Swizzle;
4954 
4955   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4956 
4957     SMLoc ModeLoc = Parser.getTok().getLoc();
4958     bool Ok = false;
4959 
4960     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4961       Ok = parseSwizzleQuadPerm(Imm);
4962     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4963       Ok = parseSwizzleBitmaskPerm(Imm);
4964     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4965       Ok = parseSwizzleBroadcast(Imm);
4966     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4967       Ok = parseSwizzleSwap(Imm);
4968     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4969       Ok = parseSwizzleReverse(Imm);
4970     } else {
4971       Error(ModeLoc, "expected a swizzle mode");
4972     }
4973 
4974     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4975   }
4976 
4977   return false;
4978 }
4979 
4980 OperandMatchResultTy
4981 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4982   SMLoc S = Parser.getTok().getLoc();
4983   int64_t Imm = 0;
4984 
4985   if (trySkipId("offset")) {
4986 
4987     bool Ok = false;
4988     if (skipToken(AsmToken::Colon, "expected a colon")) {
4989       if (trySkipId("swizzle")) {
4990         Ok = parseSwizzleMacro(Imm);
4991       } else {
4992         Ok = parseSwizzleOffset(Imm);
4993       }
4994     }
4995 
4996     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4997 
4998     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4999   } else {
5000     // Swizzle "offset" operand is optional.
5001     // If it is omitted, try parsing other optional operands.
5002     return parseOptionalOpr(Operands);
5003   }
5004 }
5005 
5006 bool
5007 AMDGPUOperand::isSwizzle() const {
5008   return isImmTy(ImmTySwizzle);
5009 }
5010 
5011 //===----------------------------------------------------------------------===//
5012 // VGPR Index Mode
5013 //===----------------------------------------------------------------------===//
5014 
5015 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5016 
5017   using namespace llvm::AMDGPU::VGPRIndexMode;
5018 
5019   if (trySkipToken(AsmToken::RParen)) {
5020     return OFF;
5021   }
5022 
5023   int64_t Imm = 0;
5024 
5025   while (true) {
5026     unsigned Mode = 0;
5027     SMLoc S = Parser.getTok().getLoc();
5028 
5029     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5030       if (trySkipId(IdSymbolic[ModeId])) {
5031         Mode = 1 << ModeId;
5032         break;
5033       }
5034     }
5035 
5036     if (Mode == 0) {
5037       Error(S, (Imm == 0)?
5038                "expected a VGPR index mode or a closing parenthesis" :
5039                "expected a VGPR index mode");
5040       break;
5041     }
5042 
5043     if (Imm & Mode) {
5044       Error(S, "duplicate VGPR index mode");
5045       break;
5046     }
5047     Imm |= Mode;
5048 
5049     if (trySkipToken(AsmToken::RParen))
5050       break;
5051     if (!skipToken(AsmToken::Comma,
5052                    "expected a comma or a closing parenthesis"))
5053       break;
5054   }
5055 
5056   return Imm;
5057 }
5058 
5059 OperandMatchResultTy
5060 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5061 
5062   int64_t Imm = 0;
5063   SMLoc S = Parser.getTok().getLoc();
5064 
5065   if (getLexer().getKind() == AsmToken::Identifier &&
5066       Parser.getTok().getString() == "gpr_idx" &&
5067       getLexer().peekTok().is(AsmToken::LParen)) {
5068 
5069     Parser.Lex();
5070     Parser.Lex();
5071 
5072     // If parse failed, trigger an error but do not return error code
5073     // to avoid excessive error messages.
5074     Imm = parseGPRIdxMacro();
5075 
5076   } else {
5077     if (getParser().parseAbsoluteExpression(Imm))
5078       return MatchOperand_NoMatch;
5079     if (Imm < 0 || !isUInt<4>(Imm)) {
5080       Error(S, "invalid immediate: only 4-bit values are legal");
5081     }
5082   }
5083 
5084   Operands.push_back(
5085       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5086   return MatchOperand_Success;
5087 }
5088 
5089 bool AMDGPUOperand::isGPRIdxMode() const {
5090   return isImmTy(ImmTyGprIdxMode);
5091 }
5092 
5093 //===----------------------------------------------------------------------===//
5094 // sopp branch targets
5095 //===----------------------------------------------------------------------===//
5096 
5097 OperandMatchResultTy
5098 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5099   SMLoc S = Parser.getTok().getLoc();
5100 
5101   switch (getLexer().getKind()) {
5102     default: return MatchOperand_ParseFail;
5103     case AsmToken::Integer: {
5104       int64_t Imm;
5105       if (getParser().parseAbsoluteExpression(Imm))
5106         return MatchOperand_ParseFail;
5107       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5108       return MatchOperand_Success;
5109     }
5110 
5111     case AsmToken::Identifier:
5112       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5113           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5114                                   Parser.getTok().getString()), getContext()), S));
5115       Parser.Lex();
5116       return MatchOperand_Success;
5117   }
5118 }
5119 
5120 //===----------------------------------------------------------------------===//
5121 // mubuf
5122 //===----------------------------------------------------------------------===//
5123 
5124 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5125   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5126 }
5127 
5128 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5129   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5130 }
5131 
5132 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5133   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5134 }
5135 
5136 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5137                                const OperandVector &Operands,
5138                                bool IsAtomic,
5139                                bool IsAtomicReturn,
5140                                bool IsLds) {
5141   bool IsLdsOpcode = IsLds;
5142   bool HasLdsModifier = false;
5143   OptionalImmIndexMap OptionalIdx;
5144   assert(IsAtomicReturn ? IsAtomic : true);
5145   unsigned FirstOperandIdx = 1;
5146 
5147   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5148     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5149 
5150     // Add the register arguments
5151     if (Op.isReg()) {
5152       Op.addRegOperands(Inst, 1);
5153       // Insert a tied src for atomic return dst.
5154       // This cannot be postponed as subsequent calls to
5155       // addImmOperands rely on correct number of MC operands.
5156       if (IsAtomicReturn && i == FirstOperandIdx)
5157         Op.addRegOperands(Inst, 1);
5158       continue;
5159     }
5160 
5161     // Handle the case where soffset is an immediate
5162     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5163       Op.addImmOperands(Inst, 1);
5164       continue;
5165     }
5166 
5167     HasLdsModifier = Op.isLDS();
5168 
5169     // Handle tokens like 'offen' which are sometimes hard-coded into the
5170     // asm string.  There are no MCInst operands for these.
5171     if (Op.isToken()) {
5172       continue;
5173     }
5174     assert(Op.isImm());
5175 
5176     // Handle optional arguments
5177     OptionalIdx[Op.getImmTy()] = i;
5178   }
5179 
5180   // This is a workaround for an llvm quirk which may result in an
5181   // incorrect instruction selection. Lds and non-lds versions of
5182   // MUBUF instructions are identical except that lds versions
5183   // have mandatory 'lds' modifier. However this modifier follows
5184   // optional modifiers and llvm asm matcher regards this 'lds'
5185   // modifier as an optional one. As a result, an lds version
5186   // of opcode may be selected even if it has no 'lds' modifier.
5187   if (IsLdsOpcode && !HasLdsModifier) {
5188     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5189     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5190       Inst.setOpcode(NoLdsOpcode);
5191       IsLdsOpcode = false;
5192     }
5193   }
5194 
5195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5196   if (!IsAtomic) { // glc is hard-coded.
5197     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5198   }
5199   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5200 
5201   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5202     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5203   }
5204 
5205   if (isGFX10())
5206     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5207 }
5208 
5209 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5210   OptionalImmIndexMap OptionalIdx;
5211 
5212   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5213     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5214 
5215     // Add the register arguments
5216     if (Op.isReg()) {
5217       Op.addRegOperands(Inst, 1);
5218       continue;
5219     }
5220 
5221     // Handle the case where soffset is an immediate
5222     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5223       Op.addImmOperands(Inst, 1);
5224       continue;
5225     }
5226 
5227     // Handle tokens like 'offen' which are sometimes hard-coded into the
5228     // asm string.  There are no MCInst operands for these.
5229     if (Op.isToken()) {
5230       continue;
5231     }
5232     assert(Op.isImm());
5233 
5234     // Handle optional arguments
5235     OptionalIdx[Op.getImmTy()] = i;
5236   }
5237 
5238   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5239                         AMDGPUOperand::ImmTyOffset);
5240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5241   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5242   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5243   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5244 
5245   if (isGFX10())
5246     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5247 }
5248 
5249 //===----------------------------------------------------------------------===//
5250 // mimg
5251 //===----------------------------------------------------------------------===//
5252 
5253 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5254                               bool IsAtomic) {
5255   unsigned I = 1;
5256   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5257   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5258     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5259   }
5260 
5261   if (IsAtomic) {
5262     // Add src, same as dst
5263     assert(Desc.getNumDefs() == 1);
5264     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5265   }
5266 
5267   OptionalImmIndexMap OptionalIdx;
5268 
5269   for (unsigned E = Operands.size(); I != E; ++I) {
5270     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5271 
5272     // Add the register arguments
5273     if (Op.isReg()) {
5274       Op.addRegOperands(Inst, 1);
5275     } else if (Op.isImmModifier()) {
5276       OptionalIdx[Op.getImmTy()] = I;
5277     } else {
5278       llvm_unreachable("unexpected operand type");
5279     }
5280   }
5281 
5282   bool IsGFX10 = isGFX10();
5283 
5284   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5285   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5286   if (IsGFX10)
5287     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5288   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5289   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5290   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5291   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5292   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5293   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5294   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5295 }
5296 
5297 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5298   cvtMIMG(Inst, Operands, true);
5299 }
5300 
5301 //===----------------------------------------------------------------------===//
5302 // smrd
5303 //===----------------------------------------------------------------------===//
5304 
5305 bool AMDGPUOperand::isSMRDOffset8() const {
5306   return isImm() && isUInt<8>(getImm());
5307 }
5308 
5309 bool AMDGPUOperand::isSMRDOffset20() const {
5310   return isImm() && isUInt<20>(getImm());
5311 }
5312 
5313 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5314   // 32-bit literals are only supported on CI and we only want to use them
5315   // when the offset is > 8-bits.
5316   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5317 }
5318 
5319 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5320   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5321 }
5322 
5323 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5324   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5325 }
5326 
5327 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5328   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5329 }
5330 
5331 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5332   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5333 }
5334 
5335 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5336   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5337 }
5338 
5339 //===----------------------------------------------------------------------===//
5340 // vop3
5341 //===----------------------------------------------------------------------===//
5342 
5343 static bool ConvertOmodMul(int64_t &Mul) {
5344   if (Mul != 1 && Mul != 2 && Mul != 4)
5345     return false;
5346 
5347   Mul >>= 1;
5348   return true;
5349 }
5350 
5351 static bool ConvertOmodDiv(int64_t &Div) {
5352   if (Div == 1) {
5353     Div = 0;
5354     return true;
5355   }
5356 
5357   if (Div == 2) {
5358     Div = 3;
5359     return true;
5360   }
5361 
5362   return false;
5363 }
5364 
5365 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5366   if (BoundCtrl == 0) {
5367     BoundCtrl = 1;
5368     return true;
5369   }
5370 
5371   if (BoundCtrl == -1) {
5372     BoundCtrl = 0;
5373     return true;
5374   }
5375 
5376   return false;
5377 }
5378 
5379 // Note: the order in this table matches the order of operands in AsmString.
5380 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5381   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5382   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5383   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5384   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5385   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5386   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5387   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5388   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5389   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5390   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5391   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5392   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5393   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5394   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5395   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5396   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5397   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5398   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5399   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5400   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5401   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5402   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5403   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5404   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5405   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5406   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5407   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5408   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5409   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5410   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5411   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5412   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5413   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5414   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5415   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5416   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5417   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5418   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5419 };
5420 
5421 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5422   unsigned size = Operands.size();
5423   assert(size > 0);
5424 
5425   OperandMatchResultTy res = parseOptionalOpr(Operands);
5426 
5427   // This is a hack to enable hardcoded mandatory operands which follow
5428   // optional operands.
5429   //
5430   // Current design assumes that all operands after the first optional operand
5431   // are also optional. However implementation of some instructions violates
5432   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5433   //
5434   // To alleviate this problem, we have to (implicitly) parse extra operands
5435   // to make sure autogenerated parser of custom operands never hit hardcoded
5436   // mandatory operands.
5437 
5438   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5439 
5440     // We have parsed the first optional operand.
5441     // Parse as many operands as necessary to skip all mandatory operands.
5442 
5443     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5444       if (res != MatchOperand_Success ||
5445           getLexer().is(AsmToken::EndOfStatement)) break;
5446       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5447       res = parseOptionalOpr(Operands);
5448     }
5449   }
5450 
5451   return res;
5452 }
5453 
5454 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5455   OperandMatchResultTy res;
5456   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5457     // try to parse any optional operand here
5458     if (Op.IsBit) {
5459       res = parseNamedBit(Op.Name, Operands, Op.Type);
5460     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5461       res = parseOModOperand(Operands);
5462     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5463                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5464                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5465       res = parseSDWASel(Operands, Op.Name, Op.Type);
5466     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5467       res = parseSDWADstUnused(Operands);
5468     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5469                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5470                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5471                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5472       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5473                                         Op.ConvertResult);
5474     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5475       res = parseDfmtNfmt(Operands);
5476     } else {
5477       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5478     }
5479     if (res != MatchOperand_NoMatch) {
5480       return res;
5481     }
5482   }
5483   return MatchOperand_NoMatch;
5484 }
5485 
5486 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5487   StringRef Name = Parser.getTok().getString();
5488   if (Name == "mul") {
5489     return parseIntWithPrefix("mul", Operands,
5490                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5491   }
5492 
5493   if (Name == "div") {
5494     return parseIntWithPrefix("div", Operands,
5495                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5496   }
5497 
5498   return MatchOperand_NoMatch;
5499 }
5500 
5501 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5502   cvtVOP3P(Inst, Operands);
5503 
5504   int Opc = Inst.getOpcode();
5505 
5506   int SrcNum;
5507   const int Ops[] = { AMDGPU::OpName::src0,
5508                       AMDGPU::OpName::src1,
5509                       AMDGPU::OpName::src2 };
5510   for (SrcNum = 0;
5511        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5512        ++SrcNum);
5513   assert(SrcNum > 0);
5514 
5515   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5516   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5517 
5518   if ((OpSel & (1 << SrcNum)) != 0) {
5519     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5520     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5521     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5522   }
5523 }
5524 
5525 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5526       // 1. This operand is input modifiers
5527   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5528       // 2. This is not last operand
5529       && Desc.NumOperands > (OpNum + 1)
5530       // 3. Next operand is register class
5531       && Desc.OpInfo[OpNum + 1].RegClass != -1
5532       // 4. Next register is not tied to any other operand
5533       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5534 }
5535 
5536 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5537 {
5538   OptionalImmIndexMap OptionalIdx;
5539   unsigned Opc = Inst.getOpcode();
5540 
5541   unsigned I = 1;
5542   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5543   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5544     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5545   }
5546 
5547   for (unsigned E = Operands.size(); I != E; ++I) {
5548     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5549     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5550       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5551     } else if (Op.isInterpSlot() ||
5552                Op.isInterpAttr() ||
5553                Op.isAttrChan()) {
5554       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5555     } else if (Op.isImmModifier()) {
5556       OptionalIdx[Op.getImmTy()] = I;
5557     } else {
5558       llvm_unreachable("unhandled operand type");
5559     }
5560   }
5561 
5562   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5563     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5564   }
5565 
5566   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5567     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5568   }
5569 
5570   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5571     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5572   }
5573 }
5574 
5575 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5576                               OptionalImmIndexMap &OptionalIdx) {
5577   unsigned Opc = Inst.getOpcode();
5578 
5579   unsigned I = 1;
5580   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5581   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5582     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5583   }
5584 
5585   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5586     // This instruction has src modifiers
5587     for (unsigned E = Operands.size(); I != E; ++I) {
5588       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5589       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5590         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5591       } else if (Op.isImmModifier()) {
5592         OptionalIdx[Op.getImmTy()] = I;
5593       } else if (Op.isRegOrImm()) {
5594         Op.addRegOrImmOperands(Inst, 1);
5595       } else {
5596         llvm_unreachable("unhandled operand type");
5597       }
5598     }
5599   } else {
5600     // No src modifiers
5601     for (unsigned E = Operands.size(); I != E; ++I) {
5602       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5603       if (Op.isMod()) {
5604         OptionalIdx[Op.getImmTy()] = I;
5605       } else {
5606         Op.addRegOrImmOperands(Inst, 1);
5607       }
5608     }
5609   }
5610 
5611   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5612     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5613   }
5614 
5615   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5616     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5617   }
5618 
5619   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5620   // it has src2 register operand that is tied to dst operand
5621   // we don't allow modifiers for this operand in assembler so src2_modifiers
5622   // should be 0.
5623   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5624       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5625       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5626       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5627       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5628       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5629       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5630     auto it = Inst.begin();
5631     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5632     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5633     ++it;
5634     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5635   }
5636 }
5637 
5638 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5639   OptionalImmIndexMap OptionalIdx;
5640   cvtVOP3(Inst, Operands, OptionalIdx);
5641 }
5642 
5643 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5644                                const OperandVector &Operands) {
5645   OptionalImmIndexMap OptIdx;
5646   const int Opc = Inst.getOpcode();
5647   const MCInstrDesc &Desc = MII.get(Opc);
5648 
5649   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5650 
5651   cvtVOP3(Inst, Operands, OptIdx);
5652 
5653   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5654     assert(!IsPacked);
5655     Inst.addOperand(Inst.getOperand(0));
5656   }
5657 
5658   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5659   // instruction, and then figure out where to actually put the modifiers
5660 
5661   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5662 
5663   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5664   if (OpSelHiIdx != -1) {
5665     int DefaultVal = IsPacked ? -1 : 0;
5666     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5667                           DefaultVal);
5668   }
5669 
5670   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5671   if (NegLoIdx != -1) {
5672     assert(IsPacked);
5673     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5674     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5675   }
5676 
5677   const int Ops[] = { AMDGPU::OpName::src0,
5678                       AMDGPU::OpName::src1,
5679                       AMDGPU::OpName::src2 };
5680   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5681                          AMDGPU::OpName::src1_modifiers,
5682                          AMDGPU::OpName::src2_modifiers };
5683 
5684   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5685 
5686   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5687   unsigned OpSelHi = 0;
5688   unsigned NegLo = 0;
5689   unsigned NegHi = 0;
5690 
5691   if (OpSelHiIdx != -1) {
5692     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5693   }
5694 
5695   if (NegLoIdx != -1) {
5696     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5697     NegLo = Inst.getOperand(NegLoIdx).getImm();
5698     NegHi = Inst.getOperand(NegHiIdx).getImm();
5699   }
5700 
5701   for (int J = 0; J < 3; ++J) {
5702     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5703     if (OpIdx == -1)
5704       break;
5705 
5706     uint32_t ModVal = 0;
5707 
5708     if ((OpSel & (1 << J)) != 0)
5709       ModVal |= SISrcMods::OP_SEL_0;
5710 
5711     if ((OpSelHi & (1 << J)) != 0)
5712       ModVal |= SISrcMods::OP_SEL_1;
5713 
5714     if ((NegLo & (1 << J)) != 0)
5715       ModVal |= SISrcMods::NEG;
5716 
5717     if ((NegHi & (1 << J)) != 0)
5718       ModVal |= SISrcMods::NEG_HI;
5719 
5720     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5721 
5722     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5723   }
5724 }
5725 
5726 //===----------------------------------------------------------------------===//
5727 // dpp
5728 //===----------------------------------------------------------------------===//
5729 
5730 bool AMDGPUOperand::isDPPCtrl() const {
5731   using namespace AMDGPU::DPP;
5732 
5733   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5734   if (result) {
5735     int64_t Imm = getImm();
5736     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5737            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5738            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5739            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5740            (Imm == DppCtrl::WAVE_SHL1) ||
5741            (Imm == DppCtrl::WAVE_ROL1) ||
5742            (Imm == DppCtrl::WAVE_SHR1) ||
5743            (Imm == DppCtrl::WAVE_ROR1) ||
5744            (Imm == DppCtrl::ROW_MIRROR) ||
5745            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5746            (Imm == DppCtrl::BCAST15) ||
5747            (Imm == DppCtrl::BCAST31);
5748   }
5749   return false;
5750 }
5751 
5752 bool AMDGPUOperand::isS16Imm() const {
5753   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5754 }
5755 
5756 bool AMDGPUOperand::isU16Imm() const {
5757   return isImm() && isUInt<16>(getImm());
5758 }
5759 
5760 OperandMatchResultTy
5761 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5762   using namespace AMDGPU::DPP;
5763 
5764   SMLoc S = Parser.getTok().getLoc();
5765   StringRef Prefix;
5766   int64_t Int;
5767 
5768   if (getLexer().getKind() == AsmToken::Identifier) {
5769     Prefix = Parser.getTok().getString();
5770   } else {
5771     return MatchOperand_NoMatch;
5772   }
5773 
5774   if (Prefix == "row_mirror") {
5775     Int = DppCtrl::ROW_MIRROR;
5776     Parser.Lex();
5777   } else if (Prefix == "row_half_mirror") {
5778     Int = DppCtrl::ROW_HALF_MIRROR;
5779     Parser.Lex();
5780   } else {
5781     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5782     if (Prefix != "quad_perm"
5783         && Prefix != "row_shl"
5784         && Prefix != "row_shr"
5785         && Prefix != "row_ror"
5786         && Prefix != "wave_shl"
5787         && Prefix != "wave_rol"
5788         && Prefix != "wave_shr"
5789         && Prefix != "wave_ror"
5790         && Prefix != "row_bcast") {
5791       return MatchOperand_NoMatch;
5792     }
5793 
5794     Parser.Lex();
5795     if (getLexer().isNot(AsmToken::Colon))
5796       return MatchOperand_ParseFail;
5797 
5798     if (Prefix == "quad_perm") {
5799       // quad_perm:[%d,%d,%d,%d]
5800       Parser.Lex();
5801       if (getLexer().isNot(AsmToken::LBrac))
5802         return MatchOperand_ParseFail;
5803       Parser.Lex();
5804 
5805       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5806         return MatchOperand_ParseFail;
5807 
5808       for (int i = 0; i < 3; ++i) {
5809         if (getLexer().isNot(AsmToken::Comma))
5810           return MatchOperand_ParseFail;
5811         Parser.Lex();
5812 
5813         int64_t Temp;
5814         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5815           return MatchOperand_ParseFail;
5816         const int shift = i*2 + 2;
5817         Int += (Temp << shift);
5818       }
5819 
5820       if (getLexer().isNot(AsmToken::RBrac))
5821         return MatchOperand_ParseFail;
5822       Parser.Lex();
5823     } else {
5824       // sel:%d
5825       Parser.Lex();
5826       if (getParser().parseAbsoluteExpression(Int))
5827         return MatchOperand_ParseFail;
5828 
5829       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5830         Int |= DppCtrl::ROW_SHL0;
5831       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5832         Int |= DppCtrl::ROW_SHR0;
5833       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5834         Int |= DppCtrl::ROW_ROR0;
5835       } else if (Prefix == "wave_shl" && 1 == Int) {
5836         Int = DppCtrl::WAVE_SHL1;
5837       } else if (Prefix == "wave_rol" && 1 == Int) {
5838         Int = DppCtrl::WAVE_ROL1;
5839       } else if (Prefix == "wave_shr" && 1 == Int) {
5840         Int = DppCtrl::WAVE_SHR1;
5841       } else if (Prefix == "wave_ror" && 1 == Int) {
5842         Int = DppCtrl::WAVE_ROR1;
5843       } else if (Prefix == "row_bcast") {
5844         if (Int == 15) {
5845           Int = DppCtrl::BCAST15;
5846         } else if (Int == 31) {
5847           Int = DppCtrl::BCAST31;
5848         } else {
5849           return MatchOperand_ParseFail;
5850         }
5851       } else {
5852         return MatchOperand_ParseFail;
5853       }
5854     }
5855   }
5856 
5857   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5858   return MatchOperand_Success;
5859 }
5860 
5861 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5862   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5863 }
5864 
5865 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5866   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5867 }
5868 
5869 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5870   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5871 }
5872 
5873 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5874   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5875 }
5876 
5877 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5878   OptionalImmIndexMap OptionalIdx;
5879 
5880   unsigned I = 1;
5881   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5882   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5883     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5884   }
5885 
5886   for (unsigned E = Operands.size(); I != E; ++I) {
5887     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5888                                             MCOI::TIED_TO);
5889     if (TiedTo != -1) {
5890       assert((unsigned)TiedTo < Inst.getNumOperands());
5891       // handle tied old or src2 for MAC instructions
5892       Inst.addOperand(Inst.getOperand(TiedTo));
5893     }
5894     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5895     // Add the register arguments
5896     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
5897       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5898       // Skip it.
5899       continue;
5900     }
5901     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5902       Op.addRegWithFPInputModsOperands(Inst, 2);
5903     } else if (Op.isDPPCtrl()) {
5904       Op.addImmOperands(Inst, 1);
5905     } else if (Op.isImm()) {
5906       // Handle optional arguments
5907       OptionalIdx[Op.getImmTy()] = I;
5908     } else {
5909       llvm_unreachable("Invalid operand type");
5910     }
5911   }
5912 
5913   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5914   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5915   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5916 }
5917 
5918 //===----------------------------------------------------------------------===//
5919 // sdwa
5920 //===----------------------------------------------------------------------===//
5921 
5922 OperandMatchResultTy
5923 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5924                               AMDGPUOperand::ImmTy Type) {
5925   using namespace llvm::AMDGPU::SDWA;
5926 
5927   SMLoc S = Parser.getTok().getLoc();
5928   StringRef Value;
5929   OperandMatchResultTy res;
5930 
5931   res = parseStringWithPrefix(Prefix, Value);
5932   if (res != MatchOperand_Success) {
5933     return res;
5934   }
5935 
5936   int64_t Int;
5937   Int = StringSwitch<int64_t>(Value)
5938         .Case("BYTE_0", SdwaSel::BYTE_0)
5939         .Case("BYTE_1", SdwaSel::BYTE_1)
5940         .Case("BYTE_2", SdwaSel::BYTE_2)
5941         .Case("BYTE_3", SdwaSel::BYTE_3)
5942         .Case("WORD_0", SdwaSel::WORD_0)
5943         .Case("WORD_1", SdwaSel::WORD_1)
5944         .Case("DWORD", SdwaSel::DWORD)
5945         .Default(0xffffffff);
5946   Parser.Lex(); // eat last token
5947 
5948   if (Int == 0xffffffff) {
5949     return MatchOperand_ParseFail;
5950   }
5951 
5952   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5953   return MatchOperand_Success;
5954 }
5955 
5956 OperandMatchResultTy
5957 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5958   using namespace llvm::AMDGPU::SDWA;
5959 
5960   SMLoc S = Parser.getTok().getLoc();
5961   StringRef Value;
5962   OperandMatchResultTy res;
5963 
5964   res = parseStringWithPrefix("dst_unused", Value);
5965   if (res != MatchOperand_Success) {
5966     return res;
5967   }
5968 
5969   int64_t Int;
5970   Int = StringSwitch<int64_t>(Value)
5971         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5972         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5973         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5974         .Default(0xffffffff);
5975   Parser.Lex(); // eat last token
5976 
5977   if (Int == 0xffffffff) {
5978     return MatchOperand_ParseFail;
5979   }
5980 
5981   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5982   return MatchOperand_Success;
5983 }
5984 
5985 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5986   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5987 }
5988 
5989 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5990   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5991 }
5992 
5993 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5994   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5995 }
5996 
5997 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5998   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5999 }
6000 
6001 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6002                               uint64_t BasicInstType, bool skipVcc) {
6003   using namespace llvm::AMDGPU::SDWA;
6004 
6005   OptionalImmIndexMap OptionalIdx;
6006   bool skippedVcc = false;
6007 
6008   unsigned I = 1;
6009   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6010   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6011     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6012   }
6013 
6014   for (unsigned E = Operands.size(); I != E; ++I) {
6015     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6016     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6017       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6018       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6019       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6020       // Skip VCC only if we didn't skip it on previous iteration.
6021       if (BasicInstType == SIInstrFlags::VOP2 &&
6022           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6023         skippedVcc = true;
6024         continue;
6025       } else if (BasicInstType == SIInstrFlags::VOPC &&
6026                  Inst.getNumOperands() == 0) {
6027         skippedVcc = true;
6028         continue;
6029       }
6030     }
6031     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6032       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6033     } else if (Op.isImm()) {
6034       // Handle optional arguments
6035       OptionalIdx[Op.getImmTy()] = I;
6036     } else {
6037       llvm_unreachable("Invalid operand type");
6038     }
6039     skippedVcc = false;
6040   }
6041 
6042   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6043       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6044       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6045     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6046     switch (BasicInstType) {
6047     case SIInstrFlags::VOP1:
6048       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6049       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6050         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6051       }
6052       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6053       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6054       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6055       break;
6056 
6057     case SIInstrFlags::VOP2:
6058       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6059       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6060         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6061       }
6062       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6063       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6064       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6065       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6066       break;
6067 
6068     case SIInstrFlags::VOPC:
6069       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6070         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6071       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6072       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6073       break;
6074 
6075     default:
6076       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6077     }
6078   }
6079 
6080   // special case v_mac_{f16, f32}:
6081   // it has src2 register operand that is tied to dst operand
6082   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6083       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6084     auto it = Inst.begin();
6085     std::advance(
6086       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6087     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6088   }
6089 }
6090 
6091 /// Force static initialization.
6092 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6093   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6094   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6095 }
6096 
6097 #define GET_REGISTER_MATCHER
6098 #define GET_MATCHER_IMPLEMENTATION
6099 #define GET_MNEMONIC_SPELL_CHECKER
6100 #include "AMDGPUGenAsmMatcher.inc"
6101 
6102 // This fuction should be defined after auto-generated include so that we have
6103 // MatchClassKind enum defined
6104 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6105                                                      unsigned Kind) {
6106   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6107   // But MatchInstructionImpl() expects to meet token and fails to validate
6108   // operand. This method checks if we are given immediate operand but expect to
6109   // get corresponding token.
6110   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6111   switch (Kind) {
6112   case MCK_addr64:
6113     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6114   case MCK_gds:
6115     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6116   case MCK_lds:
6117     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6118   case MCK_glc:
6119     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6120   case MCK_idxen:
6121     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6122   case MCK_offen:
6123     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6124   case MCK_SSrcB32:
6125     // When operands have expression values, they will return true for isToken,
6126     // because it is not possible to distinguish between a token and an
6127     // expression at parse time. MatchInstructionImpl() will always try to
6128     // match an operand as a token, when isToken returns true, and when the
6129     // name of the expression is not a valid token, the match will fail,
6130     // so we need to handle it here.
6131     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6132   case MCK_SSrcF32:
6133     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6134   case MCK_SoppBrTarget:
6135     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6136   case MCK_VReg32OrOff:
6137     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6138   case MCK_InterpSlot:
6139     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6140   case MCK_Attr:
6141     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6142   case MCK_AttrChan:
6143     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6144   default:
6145     return Match_InvalidOperand;
6146   }
6147 }
6148 
6149 //===----------------------------------------------------------------------===//
6150 // endpgm
6151 //===----------------------------------------------------------------------===//
6152 
6153 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6154   SMLoc S = Parser.getTok().getLoc();
6155   int64_t Imm = 0;
6156 
6157   if (!parseExpr(Imm)) {
6158     // The operand is optional, if not present default to 0
6159     Imm = 0;
6160   }
6161 
6162   if (!isUInt<16>(Imm)) {
6163     Error(S, "expected a 16-bit value");
6164     return MatchOperand_ParseFail;
6165   }
6166 
6167   Operands.push_back(
6168       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6169   return MatchOperand_Success;
6170 }
6171 
6172 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6173