1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "SIInstrInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/CodeGen/MachineValueType.h"
30 #include "llvm/MC/MCAsmInfo.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCInst.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrInfo.h"
36 #include "llvm/MC/MCParser/MCAsmLexer.h"
37 #include "llvm/MC/MCParser/MCAsmParser.h"
38 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
39 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
40 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/MC/MCStreamer.h"
43 #include "llvm/MC/MCSubtargetInfo.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/Support/AMDGPUMetadata.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetRegistry.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <cstdint>
56 #include <cstring>
57 #include <iterator>
58 #include <map>
59 #include <memory>
60 #include <string>
61 
62 using namespace llvm;
63 using namespace llvm::AMDGPU;
64 
65 namespace {
66 
67 class AMDGPUAsmParser;
68 
69 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
70 
71 //===----------------------------------------------------------------------===//
72 // Operand
73 //===----------------------------------------------------------------------===//
74 
75 class AMDGPUOperand : public MCParsedAsmOperand {
76   enum KindTy {
77     Token,
78     Immediate,
79     Register,
80     Expression
81   } Kind;
82 
83   SMLoc StartLoc, EndLoc;
84   const AMDGPUAsmParser *AsmParser;
85 
86 public:
87   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
88     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
89 
90   using Ptr = std::unique_ptr<AMDGPUOperand>;
91 
92   struct Modifiers {
93     bool Abs = false;
94     bool Neg = false;
95     bool Sext = false;
96 
97     bool hasFPModifiers() const { return Abs || Neg; }
98     bool hasIntModifiers() const { return Sext; }
99     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
100 
101     int64_t getFPModifiersOperand() const {
102       int64_t Operand = 0;
103       Operand |= Abs ? SISrcMods::ABS : 0;
104       Operand |= Neg ? SISrcMods::NEG : 0;
105       return Operand;
106     }
107 
108     int64_t getIntModifiersOperand() const {
109       int64_t Operand = 0;
110       Operand |= Sext ? SISrcMods::SEXT : 0;
111       return Operand;
112     }
113 
114     int64_t getModifiersOperand() const {
115       assert(!(hasFPModifiers() && hasIntModifiers())
116            && "fp and int modifiers should not be used simultaneously");
117       if (hasFPModifiers()) {
118         return getFPModifiersOperand();
119       } else if (hasIntModifiers()) {
120         return getIntModifiersOperand();
121       } else {
122         return 0;
123       }
124     }
125 
126     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
127   };
128 
129   enum ImmTy {
130     ImmTyNone,
131     ImmTyGDS,
132     ImmTyLDS,
133     ImmTyOffen,
134     ImmTyIdxen,
135     ImmTyAddr64,
136     ImmTyOffset,
137     ImmTyInstOffset,
138     ImmTyOffset0,
139     ImmTyOffset1,
140     ImmTyGLC,
141     ImmTySLC,
142     ImmTyTFE,
143     ImmTyD16,
144     ImmTyClampSI,
145     ImmTyOModSI,
146     ImmTyDppCtrl,
147     ImmTyDppRowMask,
148     ImmTyDppBankMask,
149     ImmTyDppBoundCtrl,
150     ImmTySdwaDstSel,
151     ImmTySdwaSrc0Sel,
152     ImmTySdwaSrc1Sel,
153     ImmTySdwaDstUnused,
154     ImmTyDMask,
155     ImmTyUNorm,
156     ImmTyDA,
157     ImmTyR128,
158     ImmTyLWE,
159     ImmTyExpTgt,
160     ImmTyExpCompr,
161     ImmTyExpVM,
162     ImmTyDFMT,
163     ImmTyNFMT,
164     ImmTyHwreg,
165     ImmTyOff,
166     ImmTySendMsg,
167     ImmTyInterpSlot,
168     ImmTyInterpAttr,
169     ImmTyAttrChan,
170     ImmTyOpSel,
171     ImmTyOpSelHi,
172     ImmTyNegLo,
173     ImmTyNegHi,
174     ImmTySwizzle,
175     ImmTyHigh
176   };
177 
178   struct TokOp {
179     const char *Data;
180     unsigned Length;
181   };
182 
183   struct ImmOp {
184     int64_t Val;
185     ImmTy Type;
186     bool IsFPImm;
187     Modifiers Mods;
188   };
189 
190   struct RegOp {
191     unsigned RegNo;
192     bool IsForcedVOP3;
193     Modifiers Mods;
194   };
195 
196   union {
197     TokOp Tok;
198     ImmOp Imm;
199     RegOp Reg;
200     const MCExpr *Expr;
201   };
202 
203   bool isToken() const override {
204     if (Kind == Token)
205       return true;
206 
207     if (Kind != Expression || !Expr)
208       return false;
209 
210     // When parsing operands, we can't always tell if something was meant to be
211     // a token, like 'gds', or an expression that references a global variable.
212     // In this case, we assume the string is an expression, and if we need to
213     // interpret is a token, then we treat the symbol name as the token.
214     return isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   bool isInlinableImm(MVT type) const;
222   bool isLiteralImm(MVT type) const;
223 
224   bool isRegKind() const {
225     return Kind == Register;
226   }
227 
228   bool isReg() const override {
229     return isRegKind() && !hasModifiers();
230   }
231 
232   bool isRegOrImmWithInputMods(MVT type) const {
233     return isRegKind() || isInlinableImm(type);
234   }
235 
236   bool isRegOrImmWithInt16InputMods() const {
237     return isRegOrImmWithInputMods(MVT::i16);
238   }
239 
240   bool isRegOrImmWithInt32InputMods() const {
241     return isRegOrImmWithInputMods(MVT::i32);
242   }
243 
244   bool isRegOrImmWithInt64InputMods() const {
245     return isRegOrImmWithInputMods(MVT::i64);
246   }
247 
248   bool isRegOrImmWithFP16InputMods() const {
249     return isRegOrImmWithInputMods(MVT::f16);
250   }
251 
252   bool isRegOrImmWithFP32InputMods() const {
253     return isRegOrImmWithInputMods(MVT::f32);
254   }
255 
256   bool isRegOrImmWithFP64InputMods() const {
257     return isRegOrImmWithInputMods(MVT::f64);
258   }
259 
260   bool isVReg() const {
261     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
262            isRegClass(AMDGPU::VReg_64RegClassID) ||
263            isRegClass(AMDGPU::VReg_96RegClassID) ||
264            isRegClass(AMDGPU::VReg_128RegClassID) ||
265            isRegClass(AMDGPU::VReg_256RegClassID) ||
266            isRegClass(AMDGPU::VReg_512RegClassID);
267   }
268 
269   bool isVReg32OrOff() const {
270     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
271   }
272 
273   bool isSDWAOperand(MVT type) const;
274   bool isSDWAFP16Operand() const;
275   bool isSDWAFP32Operand() const;
276   bool isSDWAInt16Operand() const;
277   bool isSDWAInt32Operand() const;
278 
279   bool isImmTy(ImmTy ImmT) const {
280     return isImm() && Imm.Type == ImmT;
281   }
282 
283   bool isImmModifier() const {
284     return isImm() && Imm.Type != ImmTyNone;
285   }
286 
287   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
288   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
289   bool isDMask() const { return isImmTy(ImmTyDMask); }
290   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
291   bool isDA() const { return isImmTy(ImmTyDA); }
292   bool isR128() const { return isImmTy(ImmTyR128); }
293   bool isLWE() const { return isImmTy(ImmTyLWE); }
294   bool isOff() const { return isImmTy(ImmTyOff); }
295   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
296   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
297   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
298   bool isOffen() const { return isImmTy(ImmTyOffen); }
299   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
300   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
301   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
302   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
303   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
304 
305   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
306   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
307   bool isGDS() const { return isImmTy(ImmTyGDS); }
308   bool isLDS() const { return isImmTy(ImmTyLDS); }
309   bool isGLC() const { return isImmTy(ImmTyGLC); }
310   bool isSLC() const { return isImmTy(ImmTySLC); }
311   bool isTFE() const { return isImmTy(ImmTyTFE); }
312   bool isD16() const { return isImmTy(ImmTyD16); }
313   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
314   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
315   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
316   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
317   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
318   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
319   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
320   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
321   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
322   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
323   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
324   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
325   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
326   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
327   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
328   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
329   bool isHigh() const { return isImmTy(ImmTyHigh); }
330 
331   bool isMod() const {
332     return isClampSI() || isOModSI();
333   }
334 
335   bool isRegOrImm() const {
336     return isReg() || isImm();
337   }
338 
339   bool isRegClass(unsigned RCID) const;
340 
341   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
342     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
343   }
344 
345   bool isSCSrcB16() const {
346     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
347   }
348 
349   bool isSCSrcV2B16() const {
350     return isSCSrcB16();
351   }
352 
353   bool isSCSrcB32() const {
354     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
355   }
356 
357   bool isSCSrcB64() const {
358     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
359   }
360 
361   bool isSCSrcF16() const {
362     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
363   }
364 
365   bool isSCSrcV2F16() const {
366     return isSCSrcF16();
367   }
368 
369   bool isSCSrcF32() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
371   }
372 
373   bool isSCSrcF64() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
375   }
376 
377   bool isSSrcB32() const {
378     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
379   }
380 
381   bool isSSrcB16() const {
382     return isSCSrcB16() || isLiteralImm(MVT::i16);
383   }
384 
385   bool isSSrcV2B16() const {
386     llvm_unreachable("cannot happen");
387     return isSSrcB16();
388   }
389 
390   bool isSSrcB64() const {
391     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
392     // See isVSrc64().
393     return isSCSrcB64() || isLiteralImm(MVT::i64);
394   }
395 
396   bool isSSrcF32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
398   }
399 
400   bool isSSrcF64() const {
401     return isSCSrcB64() || isLiteralImm(MVT::f64);
402   }
403 
404   bool isSSrcF16() const {
405     return isSCSrcB16() || isLiteralImm(MVT::f16);
406   }
407 
408   bool isSSrcV2F16() const {
409     llvm_unreachable("cannot happen");
410     return isSSrcF16();
411   }
412 
413   bool isVCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
415   }
416 
417   bool isVCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
419   }
420 
421   bool isVCSrcB16() const {
422     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
423   }
424 
425   bool isVCSrcV2B16() const {
426     return isVCSrcB16();
427   }
428 
429   bool isVCSrcF32() const {
430     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
431   }
432 
433   bool isVCSrcF64() const {
434     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
435   }
436 
437   bool isVCSrcF16() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
439   }
440 
441   bool isVCSrcV2F16() const {
442     return isVCSrcF16();
443   }
444 
445   bool isVSrcB32() const {
446     return isVCSrcF32() || isLiteralImm(MVT::i32);
447   }
448 
449   bool isVSrcB64() const {
450     return isVCSrcF64() || isLiteralImm(MVT::i64);
451   }
452 
453   bool isVSrcB16() const {
454     return isVCSrcF16() || isLiteralImm(MVT::i16);
455   }
456 
457   bool isVSrcV2B16() const {
458     llvm_unreachable("cannot happen");
459     return isVSrcB16();
460   }
461 
462   bool isVSrcF32() const {
463     return isVCSrcF32() || isLiteralImm(MVT::f32);
464   }
465 
466   bool isVSrcF64() const {
467     return isVCSrcF64() || isLiteralImm(MVT::f64);
468   }
469 
470   bool isVSrcF16() const {
471     return isVCSrcF16() || isLiteralImm(MVT::f16);
472   }
473 
474   bool isVSrcV2F16() const {
475     llvm_unreachable("cannot happen");
476     return isVSrcF16();
477   }
478 
479   bool isKImmFP32() const {
480     return isLiteralImm(MVT::f32);
481   }
482 
483   bool isKImmFP16() const {
484     return isLiteralImm(MVT::f16);
485   }
486 
487   bool isMem() const override {
488     return false;
489   }
490 
491   bool isExpr() const {
492     return Kind == Expression;
493   }
494 
495   bool isSoppBrTarget() const {
496     return isExpr() || isImm();
497   }
498 
499   bool isSWaitCnt() const;
500   bool isHwreg() const;
501   bool isSendMsg() const;
502   bool isSwizzle() const;
503   bool isSMRDOffset8() const;
504   bool isSMRDOffset20() const;
505   bool isSMRDLiteralOffset() const;
506   bool isDPPCtrl() const;
507   bool isGPRIdxMode() const;
508   bool isS16Imm() const;
509   bool isU16Imm() const;
510 
511   StringRef getExpressionAsToken() const {
512     assert(isExpr());
513     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
514     return S->getSymbol().getName();
515   }
516 
517   StringRef getToken() const {
518     assert(isToken());
519 
520     if (Kind == Expression)
521       return getExpressionAsToken();
522 
523     return StringRef(Tok.Data, Tok.Length);
524   }
525 
526   int64_t getImm() const {
527     assert(isImm());
528     return Imm.Val;
529   }
530 
531   ImmTy getImmTy() const {
532     assert(isImm());
533     return Imm.Type;
534   }
535 
536   unsigned getReg() const override {
537     return Reg.RegNo;
538   }
539 
540   SMLoc getStartLoc() const override {
541     return StartLoc;
542   }
543 
544   SMLoc getEndLoc() const override {
545     return EndLoc;
546   }
547 
548   SMRange getLocRange() const {
549     return SMRange(StartLoc, EndLoc);
550   }
551 
552   Modifiers getModifiers() const {
553     assert(isRegKind() || isImmTy(ImmTyNone));
554     return isRegKind() ? Reg.Mods : Imm.Mods;
555   }
556 
557   void setModifiers(Modifiers Mods) {
558     assert(isRegKind() || isImmTy(ImmTyNone));
559     if (isRegKind())
560       Reg.Mods = Mods;
561     else
562       Imm.Mods = Mods;
563   }
564 
565   bool hasModifiers() const {
566     return getModifiers().hasModifiers();
567   }
568 
569   bool hasFPModifiers() const {
570     return getModifiers().hasFPModifiers();
571   }
572 
573   bool hasIntModifiers() const {
574     return getModifiers().hasIntModifiers();
575   }
576 
577   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
578 
579   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
580 
581   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
582 
583   template <unsigned Bitwidth>
584   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
585 
586   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
587     addKImmFPOperands<16>(Inst, N);
588   }
589 
590   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
591     addKImmFPOperands<32>(Inst, N);
592   }
593 
594   void addRegOperands(MCInst &Inst, unsigned N) const;
595 
596   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
597     if (isRegKind())
598       addRegOperands(Inst, N);
599     else if (isExpr())
600       Inst.addOperand(MCOperand::createExpr(Expr));
601     else
602       addImmOperands(Inst, N);
603   }
604 
605   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
606     Modifiers Mods = getModifiers();
607     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
608     if (isRegKind()) {
609       addRegOperands(Inst, N);
610     } else {
611       addImmOperands(Inst, N, false);
612     }
613   }
614 
615   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
616     assert(!hasIntModifiers());
617     addRegOrImmWithInputModsOperands(Inst, N);
618   }
619 
620   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
621     assert(!hasFPModifiers());
622     addRegOrImmWithInputModsOperands(Inst, N);
623   }
624 
625   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
626     Modifiers Mods = getModifiers();
627     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
628     assert(isRegKind());
629     addRegOperands(Inst, N);
630   }
631 
632   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
633     assert(!hasIntModifiers());
634     addRegWithInputModsOperands(Inst, N);
635   }
636 
637   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
638     assert(!hasFPModifiers());
639     addRegWithInputModsOperands(Inst, N);
640   }
641 
642   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
643     if (isImm())
644       addImmOperands(Inst, N);
645     else {
646       assert(isExpr());
647       Inst.addOperand(MCOperand::createExpr(Expr));
648     }
649   }
650 
651   static void printImmTy(raw_ostream& OS, ImmTy Type) {
652     switch (Type) {
653     case ImmTyNone: OS << "None"; break;
654     case ImmTyGDS: OS << "GDS"; break;
655     case ImmTyLDS: OS << "LDS"; break;
656     case ImmTyOffen: OS << "Offen"; break;
657     case ImmTyIdxen: OS << "Idxen"; break;
658     case ImmTyAddr64: OS << "Addr64"; break;
659     case ImmTyOffset: OS << "Offset"; break;
660     case ImmTyInstOffset: OS << "InstOffset"; break;
661     case ImmTyOffset0: OS << "Offset0"; break;
662     case ImmTyOffset1: OS << "Offset1"; break;
663     case ImmTyGLC: OS << "GLC"; break;
664     case ImmTySLC: OS << "SLC"; break;
665     case ImmTyTFE: OS << "TFE"; break;
666     case ImmTyD16: OS << "D16"; break;
667     case ImmTyDFMT: OS << "DFMT"; break;
668     case ImmTyNFMT: OS << "NFMT"; break;
669     case ImmTyClampSI: OS << "ClampSI"; break;
670     case ImmTyOModSI: OS << "OModSI"; break;
671     case ImmTyDppCtrl: OS << "DppCtrl"; break;
672     case ImmTyDppRowMask: OS << "DppRowMask"; break;
673     case ImmTyDppBankMask: OS << "DppBankMask"; break;
674     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
675     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
676     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
677     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
678     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
679     case ImmTyDMask: OS << "DMask"; break;
680     case ImmTyUNorm: OS << "UNorm"; break;
681     case ImmTyDA: OS << "DA"; break;
682     case ImmTyR128: OS << "R128"; break;
683     case ImmTyLWE: OS << "LWE"; break;
684     case ImmTyOff: OS << "Off"; break;
685     case ImmTyExpTgt: OS << "ExpTgt"; break;
686     case ImmTyExpCompr: OS << "ExpCompr"; break;
687     case ImmTyExpVM: OS << "ExpVM"; break;
688     case ImmTyHwreg: OS << "Hwreg"; break;
689     case ImmTySendMsg: OS << "SendMsg"; break;
690     case ImmTyInterpSlot: OS << "InterpSlot"; break;
691     case ImmTyInterpAttr: OS << "InterpAttr"; break;
692     case ImmTyAttrChan: OS << "AttrChan"; break;
693     case ImmTyOpSel: OS << "OpSel"; break;
694     case ImmTyOpSelHi: OS << "OpSelHi"; break;
695     case ImmTyNegLo: OS << "NegLo"; break;
696     case ImmTyNegHi: OS << "NegHi"; break;
697     case ImmTySwizzle: OS << "Swizzle"; break;
698     case ImmTyHigh: OS << "High"; break;
699     }
700   }
701 
702   void print(raw_ostream &OS) const override {
703     switch (Kind) {
704     case Register:
705       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
706       break;
707     case Immediate:
708       OS << '<' << getImm();
709       if (getImmTy() != ImmTyNone) {
710         OS << " type: "; printImmTy(OS, getImmTy());
711       }
712       OS << " mods: " << Imm.Mods << '>';
713       break;
714     case Token:
715       OS << '\'' << getToken() << '\'';
716       break;
717     case Expression:
718       OS << "<expr " << *Expr << '>';
719       break;
720     }
721   }
722 
723   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
724                                       int64_t Val, SMLoc Loc,
725                                       ImmTy Type = ImmTyNone,
726                                       bool IsFPImm = false) {
727     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
728     Op->Imm.Val = Val;
729     Op->Imm.IsFPImm = IsFPImm;
730     Op->Imm.Type = Type;
731     Op->Imm.Mods = Modifiers();
732     Op->StartLoc = Loc;
733     Op->EndLoc = Loc;
734     return Op;
735   }
736 
737   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
738                                         StringRef Str, SMLoc Loc,
739                                         bool HasExplicitEncodingSize = true) {
740     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
741     Res->Tok.Data = Str.data();
742     Res->Tok.Length = Str.size();
743     Res->StartLoc = Loc;
744     Res->EndLoc = Loc;
745     return Res;
746   }
747 
748   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
749                                       unsigned RegNo, SMLoc S,
750                                       SMLoc E,
751                                       bool ForceVOP3) {
752     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
753     Op->Reg.RegNo = RegNo;
754     Op->Reg.Mods = Modifiers();
755     Op->Reg.IsForcedVOP3 = ForceVOP3;
756     Op->StartLoc = S;
757     Op->EndLoc = E;
758     return Op;
759   }
760 
761   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
762                                        const class MCExpr *Expr, SMLoc S) {
763     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
764     Op->Expr = Expr;
765     Op->StartLoc = S;
766     Op->EndLoc = S;
767     return Op;
768   }
769 };
770 
771 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
772   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
773   return OS;
774 }
775 
776 //===----------------------------------------------------------------------===//
777 // AsmParser
778 //===----------------------------------------------------------------------===//
779 
780 // Holds info related to the current kernel, e.g. count of SGPRs used.
781 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
782 // .amdgpu_hsa_kernel or at EOF.
783 class KernelScopeInfo {
784   int SgprIndexUnusedMin = -1;
785   int VgprIndexUnusedMin = -1;
786   MCContext *Ctx = nullptr;
787 
788   void usesSgprAt(int i) {
789     if (i >= SgprIndexUnusedMin) {
790       SgprIndexUnusedMin = ++i;
791       if (Ctx) {
792         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
793         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
794       }
795     }
796   }
797 
798   void usesVgprAt(int i) {
799     if (i >= VgprIndexUnusedMin) {
800       VgprIndexUnusedMin = ++i;
801       if (Ctx) {
802         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
803         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
804       }
805     }
806   }
807 
808 public:
809   KernelScopeInfo() = default;
810 
811   void initialize(MCContext &Context) {
812     Ctx = &Context;
813     usesSgprAt(SgprIndexUnusedMin = -1);
814     usesVgprAt(VgprIndexUnusedMin = -1);
815   }
816 
817   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
818     switch (RegKind) {
819       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
820       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
821       default: break;
822     }
823   }
824 };
825 
826 class AMDGPUAsmParser : public MCTargetAsmParser {
827   MCAsmParser &Parser;
828 
829   // Number of extra operands parsed after the first optional operand.
830   // This may be necessary to skip hardcoded mandatory operands.
831   static const unsigned MAX_OPR_LOOKAHEAD = 8;
832 
833   unsigned ForcedEncodingSize = 0;
834   bool ForcedDPP = false;
835   bool ForcedSDWA = false;
836   KernelScopeInfo KernelScope;
837 
838   /// @name Auto-generated Match Functions
839   /// {
840 
841 #define GET_ASSEMBLER_HEADER
842 #include "AMDGPUGenAsmMatcher.inc"
843 
844   /// }
845 
846 private:
847   bool ParseAsAbsoluteExpression(uint32_t &Ret);
848   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
849   bool ParseDirectiveHSACodeObjectVersion();
850   bool ParseDirectiveHSACodeObjectISA();
851   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
852   bool ParseDirectiveAMDKernelCodeT();
853   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
854   bool ParseDirectiveAMDGPUHsaKernel();
855 
856   bool ParseDirectiveISAVersion();
857   bool ParseDirectiveHSAMetadata();
858   bool ParseDirectivePALMetadata();
859 
860   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
861                              RegisterKind RegKind, unsigned Reg1,
862                              unsigned RegNum);
863   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
864                            unsigned& RegNum, unsigned& RegWidth,
865                            unsigned *DwordRegIndex);
866   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
867                     bool IsAtomic, bool IsAtomicReturn);
868   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
869                  bool IsGdsHardcoded);
870 
871 public:
872   enum AMDGPUMatchResultTy {
873     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
874   };
875 
876   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
877 
878   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
879                const MCInstrInfo &MII,
880                const MCTargetOptions &Options)
881       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
882     MCAsmParserExtension::Initialize(Parser);
883 
884     if (getFeatureBits().none()) {
885       // Set default features.
886       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
887     }
888 
889     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
890 
891     {
892       // TODO: make those pre-defined variables read-only.
893       // Currently there is none suitable machinery in the core llvm-mc for this.
894       // MCSymbol::isRedefinable is intended for another purpose, and
895       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
896       AMDGPU::IsaInfo::IsaVersion ISA =
897           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
898       MCContext &Ctx = getContext();
899       MCSymbol *Sym =
900           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
901       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
902       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
903       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
904       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
905       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
906     }
907     KernelScope.initialize(getContext());
908   }
909 
910   bool hasXNACK() const {
911     return AMDGPU::hasXNACK(getSTI());
912   }
913 
914   bool hasMIMG_R128() const {
915     return AMDGPU::hasMIMG_R128(getSTI());
916   }
917 
918   bool hasPackedD16() const {
919     return AMDGPU::hasPackedD16(getSTI());
920   }
921 
922   bool isSI() const {
923     return AMDGPU::isSI(getSTI());
924   }
925 
926   bool isCI() const {
927     return AMDGPU::isCI(getSTI());
928   }
929 
930   bool isVI() const {
931     return AMDGPU::isVI(getSTI());
932   }
933 
934   bool isGFX9() const {
935     return AMDGPU::isGFX9(getSTI());
936   }
937 
938   bool hasInv2PiInlineImm() const {
939     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
940   }
941 
942   bool hasFlatOffsets() const {
943     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
944   }
945 
946   bool hasSGPR102_SGPR103() const {
947     return !isVI();
948   }
949 
950   bool hasIntClamp() const {
951     return getFeatureBits()[AMDGPU::FeatureIntClamp];
952   }
953 
954   AMDGPUTargetStreamer &getTargetStreamer() {
955     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
956     return static_cast<AMDGPUTargetStreamer &>(TS);
957   }
958 
959   const MCRegisterInfo *getMRI() const {
960     // We need this const_cast because for some reason getContext() is not const
961     // in MCAsmParser.
962     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
963   }
964 
965   const MCInstrInfo *getMII() const {
966     return &MII;
967   }
968 
969   const FeatureBitset &getFeatureBits() const {
970     return getSTI().getFeatureBits();
971   }
972 
973   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
974   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
975   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
976 
977   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
978   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
979   bool isForcedDPP() const { return ForcedDPP; }
980   bool isForcedSDWA() const { return ForcedSDWA; }
981   ArrayRef<unsigned> getMatchedVariants() const;
982 
983   std::unique_ptr<AMDGPUOperand> parseRegister();
984   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
985   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
986   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
987                                       unsigned Kind) override;
988   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
989                                OperandVector &Operands, MCStreamer &Out,
990                                uint64_t &ErrorInfo,
991                                bool MatchingInlineAsm) override;
992   bool ParseDirective(AsmToken DirectiveID) override;
993   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
994   StringRef parseMnemonicSuffix(StringRef Name);
995   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
996                         SMLoc NameLoc, OperandVector &Operands) override;
997   //bool ProcessInstruction(MCInst &Inst);
998 
999   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1000 
1001   OperandMatchResultTy
1002   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1003                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1004                      bool (*ConvertResult)(int64_t &) = nullptr);
1005 
1006   OperandMatchResultTy parseOperandArrayWithPrefix(
1007     const char *Prefix,
1008     OperandVector &Operands,
1009     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1010     bool (*ConvertResult)(int64_t&) = nullptr);
1011 
1012   OperandMatchResultTy
1013   parseNamedBit(const char *Name, OperandVector &Operands,
1014                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1015   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1016                                              StringRef &Value);
1017 
1018   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1019   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1020   OperandMatchResultTy parseReg(OperandVector &Operands);
1021   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1022   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1023   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1024   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1025   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1026   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1027 
1028   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1029   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1030   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1031   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1032 
1033   bool parseCnt(int64_t &IntVal);
1034   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1035   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1036 
1037 private:
1038   struct OperandInfoTy {
1039     int64_t Id;
1040     bool IsSymbolic = false;
1041 
1042     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1043   };
1044 
1045   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1046   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1047 
1048   void errorExpTgt();
1049   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1050 
1051   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1052   bool validateConstantBusLimitations(const MCInst &Inst);
1053   bool validateEarlyClobberLimitations(const MCInst &Inst);
1054   bool validateIntClampSupported(const MCInst &Inst);
1055   bool validateMIMGAtomicDMask(const MCInst &Inst);
1056   bool validateMIMGDataSize(const MCInst &Inst);
1057   bool validateMIMGR128(const MCInst &Inst);
1058   bool validateMIMGD16(const MCInst &Inst);
1059   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1060   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1061   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1062 
1063   bool trySkipId(const StringRef Id);
1064   bool trySkipToken(const AsmToken::TokenKind Kind);
1065   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1066   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1067   bool parseExpr(int64_t &Imm);
1068 
1069 public:
1070   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1071   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1072 
1073   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1074   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1075   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1076   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1077   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1078 
1079   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1080                             const unsigned MinVal,
1081                             const unsigned MaxVal,
1082                             const StringRef ErrMsg);
1083   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1084   bool parseSwizzleOffset(int64_t &Imm);
1085   bool parseSwizzleMacro(int64_t &Imm);
1086   bool parseSwizzleQuadPerm(int64_t &Imm);
1087   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1088   bool parseSwizzleBroadcast(int64_t &Imm);
1089   bool parseSwizzleSwap(int64_t &Imm);
1090   bool parseSwizzleReverse(int64_t &Imm);
1091 
1092   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1093   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1094   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1095   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1096 
1097   AMDGPUOperand::Ptr defaultGLC() const;
1098   AMDGPUOperand::Ptr defaultSLC() const;
1099   AMDGPUOperand::Ptr defaultTFE() const;
1100 
1101   AMDGPUOperand::Ptr defaultD16() const;
1102   AMDGPUOperand::Ptr defaultDMask() const;
1103   AMDGPUOperand::Ptr defaultUNorm() const;
1104   AMDGPUOperand::Ptr defaultDA() const;
1105   AMDGPUOperand::Ptr defaultR128() const;
1106   AMDGPUOperand::Ptr defaultLWE() const;
1107   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1108   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1109   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1110   AMDGPUOperand::Ptr defaultOffsetU12() const;
1111   AMDGPUOperand::Ptr defaultOffsetS13() const;
1112 
1113   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1114 
1115   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1116                OptionalImmIndexMap &OptionalIdx);
1117   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1118   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1119   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1120 
1121   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1122 
1123   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1124                bool IsAtomic = false);
1125   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1126 
1127   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1128   AMDGPUOperand::Ptr defaultRowMask() const;
1129   AMDGPUOperand::Ptr defaultBankMask() const;
1130   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1131   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1132 
1133   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1134                                     AMDGPUOperand::ImmTy Type);
1135   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1136   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1137   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1138   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1139   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1140   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1141                 uint64_t BasicInstType, bool skipVcc = false);
1142 };
1143 
1144 struct OptionalOperand {
1145   const char *Name;
1146   AMDGPUOperand::ImmTy Type;
1147   bool IsBit;
1148   bool (*ConvertResult)(int64_t&);
1149 };
1150 
1151 } // end anonymous namespace
1152 
1153 // May be called with integer type with equivalent bitwidth.
1154 static const fltSemantics *getFltSemantics(unsigned Size) {
1155   switch (Size) {
1156   case 4:
1157     return &APFloat::IEEEsingle();
1158   case 8:
1159     return &APFloat::IEEEdouble();
1160   case 2:
1161     return &APFloat::IEEEhalf();
1162   default:
1163     llvm_unreachable("unsupported fp type");
1164   }
1165 }
1166 
1167 static const fltSemantics *getFltSemantics(MVT VT) {
1168   return getFltSemantics(VT.getSizeInBits() / 8);
1169 }
1170 
1171 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1172   switch (OperandType) {
1173   case AMDGPU::OPERAND_REG_IMM_INT32:
1174   case AMDGPU::OPERAND_REG_IMM_FP32:
1175   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1176   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1177     return &APFloat::IEEEsingle();
1178   case AMDGPU::OPERAND_REG_IMM_INT64:
1179   case AMDGPU::OPERAND_REG_IMM_FP64:
1180   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1181   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1182     return &APFloat::IEEEdouble();
1183   case AMDGPU::OPERAND_REG_IMM_INT16:
1184   case AMDGPU::OPERAND_REG_IMM_FP16:
1185   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1186   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1187   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1188   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1189     return &APFloat::IEEEhalf();
1190   default:
1191     llvm_unreachable("unsupported fp type");
1192   }
1193 }
1194 
1195 //===----------------------------------------------------------------------===//
1196 // Operand
1197 //===----------------------------------------------------------------------===//
1198 
1199 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1200   bool Lost;
1201 
1202   // Convert literal to single precision
1203   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1204                                                APFloat::rmNearestTiesToEven,
1205                                                &Lost);
1206   // We allow precision lost but not overflow or underflow
1207   if (Status != APFloat::opOK &&
1208       Lost &&
1209       ((Status & APFloat::opOverflow)  != 0 ||
1210        (Status & APFloat::opUnderflow) != 0)) {
1211     return false;
1212   }
1213 
1214   return true;
1215 }
1216 
1217 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1218   if (!isImmTy(ImmTyNone)) {
1219     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1220     return false;
1221   }
1222   // TODO: We should avoid using host float here. It would be better to
1223   // check the float bit values which is what a few other places do.
1224   // We've had bot failures before due to weird NaN support on mips hosts.
1225 
1226   APInt Literal(64, Imm.Val);
1227 
1228   if (Imm.IsFPImm) { // We got fp literal token
1229     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1230       return AMDGPU::isInlinableLiteral64(Imm.Val,
1231                                           AsmParser->hasInv2PiInlineImm());
1232     }
1233 
1234     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1235     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1236       return false;
1237 
1238     if (type.getScalarSizeInBits() == 16) {
1239       return AMDGPU::isInlinableLiteral16(
1240         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1241         AsmParser->hasInv2PiInlineImm());
1242     }
1243 
1244     // Check if single precision literal is inlinable
1245     return AMDGPU::isInlinableLiteral32(
1246       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1247       AsmParser->hasInv2PiInlineImm());
1248   }
1249 
1250   // We got int literal token.
1251   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1252     return AMDGPU::isInlinableLiteral64(Imm.Val,
1253                                         AsmParser->hasInv2PiInlineImm());
1254   }
1255 
1256   if (type.getScalarSizeInBits() == 16) {
1257     return AMDGPU::isInlinableLiteral16(
1258       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1259       AsmParser->hasInv2PiInlineImm());
1260   }
1261 
1262   return AMDGPU::isInlinableLiteral32(
1263     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1264     AsmParser->hasInv2PiInlineImm());
1265 }
1266 
1267 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1268   // Check that this immediate can be added as literal
1269   if (!isImmTy(ImmTyNone)) {
1270     return false;
1271   }
1272 
1273   if (!Imm.IsFPImm) {
1274     // We got int literal token.
1275 
1276     if (type == MVT::f64 && hasFPModifiers()) {
1277       // Cannot apply fp modifiers to int literals preserving the same semantics
1278       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1279       // disable these cases.
1280       return false;
1281     }
1282 
1283     unsigned Size = type.getSizeInBits();
1284     if (Size == 64)
1285       Size = 32;
1286 
1287     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1288     // types.
1289     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1290   }
1291 
1292   // We got fp literal token
1293   if (type == MVT::f64) { // Expected 64-bit fp operand
1294     // We would set low 64-bits of literal to zeroes but we accept this literals
1295     return true;
1296   }
1297 
1298   if (type == MVT::i64) { // Expected 64-bit int operand
1299     // We don't allow fp literals in 64-bit integer instructions. It is
1300     // unclear how we should encode them.
1301     return false;
1302   }
1303 
1304   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1305   return canLosslesslyConvertToFPType(FPLiteral, type);
1306 }
1307 
1308 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1309   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1310 }
1311 
1312 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1313   if (AsmParser->isVI())
1314     return isVReg();
1315   else if (AsmParser->isGFX9())
1316     return isRegKind() || isInlinableImm(type);
1317   else
1318     return false;
1319 }
1320 
1321 bool AMDGPUOperand::isSDWAFP16Operand() const {
1322   return isSDWAOperand(MVT::f16);
1323 }
1324 
1325 bool AMDGPUOperand::isSDWAFP32Operand() const {
1326   return isSDWAOperand(MVT::f32);
1327 }
1328 
1329 bool AMDGPUOperand::isSDWAInt16Operand() const {
1330   return isSDWAOperand(MVT::i16);
1331 }
1332 
1333 bool AMDGPUOperand::isSDWAInt32Operand() const {
1334   return isSDWAOperand(MVT::i32);
1335 }
1336 
1337 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1338 {
1339   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1340   assert(Size == 2 || Size == 4 || Size == 8);
1341 
1342   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1343 
1344   if (Imm.Mods.Abs) {
1345     Val &= ~FpSignMask;
1346   }
1347   if (Imm.Mods.Neg) {
1348     Val ^= FpSignMask;
1349   }
1350 
1351   return Val;
1352 }
1353 
1354 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1355   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1356                              Inst.getNumOperands())) {
1357     addLiteralImmOperand(Inst, Imm.Val,
1358                          ApplyModifiers &
1359                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1360   } else {
1361     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1362     Inst.addOperand(MCOperand::createImm(Imm.Val));
1363   }
1364 }
1365 
1366 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1367   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1368   auto OpNum = Inst.getNumOperands();
1369   // Check that this operand accepts literals
1370   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1371 
1372   if (ApplyModifiers) {
1373     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1374     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1375     Val = applyInputFPModifiers(Val, Size);
1376   }
1377 
1378   APInt Literal(64, Val);
1379   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1380 
1381   if (Imm.IsFPImm) { // We got fp literal token
1382     switch (OpTy) {
1383     case AMDGPU::OPERAND_REG_IMM_INT64:
1384     case AMDGPU::OPERAND_REG_IMM_FP64:
1385     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1386     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1387       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1388                                        AsmParser->hasInv2PiInlineImm())) {
1389         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1390         return;
1391       }
1392 
1393       // Non-inlineable
1394       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1395         // For fp operands we check if low 32 bits are zeros
1396         if (Literal.getLoBits(32) != 0) {
1397           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1398           "Can't encode literal as exact 64-bit floating-point operand. "
1399           "Low 32-bits will be set to zero");
1400         }
1401 
1402         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1403         return;
1404       }
1405 
1406       // We don't allow fp literals in 64-bit integer instructions. It is
1407       // unclear how we should encode them. This case should be checked earlier
1408       // in predicate methods (isLiteralImm())
1409       llvm_unreachable("fp literal in 64-bit integer instruction.");
1410 
1411     case AMDGPU::OPERAND_REG_IMM_INT32:
1412     case AMDGPU::OPERAND_REG_IMM_FP32:
1413     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1414     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1415     case AMDGPU::OPERAND_REG_IMM_INT16:
1416     case AMDGPU::OPERAND_REG_IMM_FP16:
1417     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1418     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1419     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1420     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1421       bool lost;
1422       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1423       // Convert literal to single precision
1424       FPLiteral.convert(*getOpFltSemantics(OpTy),
1425                         APFloat::rmNearestTiesToEven, &lost);
1426       // We allow precision lost but not overflow or underflow. This should be
1427       // checked earlier in isLiteralImm()
1428 
1429       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1430       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1431           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1432         ImmVal |= (ImmVal << 16);
1433       }
1434 
1435       Inst.addOperand(MCOperand::createImm(ImmVal));
1436       return;
1437     }
1438     default:
1439       llvm_unreachable("invalid operand size");
1440     }
1441 
1442     return;
1443   }
1444 
1445    // We got int literal token.
1446   // Only sign extend inline immediates.
1447   // FIXME: No errors on truncation
1448   switch (OpTy) {
1449   case AMDGPU::OPERAND_REG_IMM_INT32:
1450   case AMDGPU::OPERAND_REG_IMM_FP32:
1451   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1452   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1453     if (isInt<32>(Val) &&
1454         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1455                                      AsmParser->hasInv2PiInlineImm())) {
1456       Inst.addOperand(MCOperand::createImm(Val));
1457       return;
1458     }
1459 
1460     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1461     return;
1462 
1463   case AMDGPU::OPERAND_REG_IMM_INT64:
1464   case AMDGPU::OPERAND_REG_IMM_FP64:
1465   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1466   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1467     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1468       Inst.addOperand(MCOperand::createImm(Val));
1469       return;
1470     }
1471 
1472     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1473     return;
1474 
1475   case AMDGPU::OPERAND_REG_IMM_INT16:
1476   case AMDGPU::OPERAND_REG_IMM_FP16:
1477   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1478   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1479     if (isInt<16>(Val) &&
1480         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1481                                      AsmParser->hasInv2PiInlineImm())) {
1482       Inst.addOperand(MCOperand::createImm(Val));
1483       return;
1484     }
1485 
1486     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1487     return;
1488 
1489   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1490   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1491     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1492     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1493                                         AsmParser->hasInv2PiInlineImm()));
1494 
1495     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1496                       static_cast<uint32_t>(LiteralVal);
1497     Inst.addOperand(MCOperand::createImm(ImmVal));
1498     return;
1499   }
1500   default:
1501     llvm_unreachable("invalid operand size");
1502   }
1503 }
1504 
1505 template <unsigned Bitwidth>
1506 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1507   APInt Literal(64, Imm.Val);
1508 
1509   if (!Imm.IsFPImm) {
1510     // We got int literal token.
1511     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1512     return;
1513   }
1514 
1515   bool Lost;
1516   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1517   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1518                     APFloat::rmNearestTiesToEven, &Lost);
1519   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1520 }
1521 
1522 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1523   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1524 }
1525 
1526 //===----------------------------------------------------------------------===//
1527 // AsmParser
1528 //===----------------------------------------------------------------------===//
1529 
1530 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1531   if (Is == IS_VGPR) {
1532     switch (RegWidth) {
1533       default: return -1;
1534       case 1: return AMDGPU::VGPR_32RegClassID;
1535       case 2: return AMDGPU::VReg_64RegClassID;
1536       case 3: return AMDGPU::VReg_96RegClassID;
1537       case 4: return AMDGPU::VReg_128RegClassID;
1538       case 8: return AMDGPU::VReg_256RegClassID;
1539       case 16: return AMDGPU::VReg_512RegClassID;
1540     }
1541   } else if (Is == IS_TTMP) {
1542     switch (RegWidth) {
1543       default: return -1;
1544       case 1: return AMDGPU::TTMP_32RegClassID;
1545       case 2: return AMDGPU::TTMP_64RegClassID;
1546       case 4: return AMDGPU::TTMP_128RegClassID;
1547       case 8: return AMDGPU::TTMP_256RegClassID;
1548       case 16: return AMDGPU::TTMP_512RegClassID;
1549     }
1550   } else if (Is == IS_SGPR) {
1551     switch (RegWidth) {
1552       default: return -1;
1553       case 1: return AMDGPU::SGPR_32RegClassID;
1554       case 2: return AMDGPU::SGPR_64RegClassID;
1555       case 4: return AMDGPU::SGPR_128RegClassID;
1556       case 8: return AMDGPU::SGPR_256RegClassID;
1557       case 16: return AMDGPU::SGPR_512RegClassID;
1558     }
1559   }
1560   return -1;
1561 }
1562 
1563 static unsigned getSpecialRegForName(StringRef RegName) {
1564   return StringSwitch<unsigned>(RegName)
1565     .Case("exec", AMDGPU::EXEC)
1566     .Case("vcc", AMDGPU::VCC)
1567     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1568     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1569     .Case("m0", AMDGPU::M0)
1570     .Case("scc", AMDGPU::SCC)
1571     .Case("tba", AMDGPU::TBA)
1572     .Case("tma", AMDGPU::TMA)
1573     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1574     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1575     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1576     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1577     .Case("vcc_lo", AMDGPU::VCC_LO)
1578     .Case("vcc_hi", AMDGPU::VCC_HI)
1579     .Case("exec_lo", AMDGPU::EXEC_LO)
1580     .Case("exec_hi", AMDGPU::EXEC_HI)
1581     .Case("tma_lo", AMDGPU::TMA_LO)
1582     .Case("tma_hi", AMDGPU::TMA_HI)
1583     .Case("tba_lo", AMDGPU::TBA_LO)
1584     .Case("tba_hi", AMDGPU::TBA_HI)
1585     .Default(0);
1586 }
1587 
1588 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1589                                     SMLoc &EndLoc) {
1590   auto R = parseRegister();
1591   if (!R) return true;
1592   assert(R->isReg());
1593   RegNo = R->getReg();
1594   StartLoc = R->getStartLoc();
1595   EndLoc = R->getEndLoc();
1596   return false;
1597 }
1598 
1599 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1600                                             RegisterKind RegKind, unsigned Reg1,
1601                                             unsigned RegNum) {
1602   switch (RegKind) {
1603   case IS_SPECIAL:
1604     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1605       Reg = AMDGPU::EXEC;
1606       RegWidth = 2;
1607       return true;
1608     }
1609     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1610       Reg = AMDGPU::FLAT_SCR;
1611       RegWidth = 2;
1612       return true;
1613     }
1614     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1615       Reg = AMDGPU::XNACK_MASK;
1616       RegWidth = 2;
1617       return true;
1618     }
1619     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1620       Reg = AMDGPU::VCC;
1621       RegWidth = 2;
1622       return true;
1623     }
1624     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1625       Reg = AMDGPU::TBA;
1626       RegWidth = 2;
1627       return true;
1628     }
1629     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1630       Reg = AMDGPU::TMA;
1631       RegWidth = 2;
1632       return true;
1633     }
1634     return false;
1635   case IS_VGPR:
1636   case IS_SGPR:
1637   case IS_TTMP:
1638     if (Reg1 != Reg + RegWidth) {
1639       return false;
1640     }
1641     RegWidth++;
1642     return true;
1643   default:
1644     llvm_unreachable("unexpected register kind");
1645   }
1646 }
1647 
1648 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1649                                           unsigned &RegNum, unsigned &RegWidth,
1650                                           unsigned *DwordRegIndex) {
1651   if (DwordRegIndex) { *DwordRegIndex = 0; }
1652   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1653   if (getLexer().is(AsmToken::Identifier)) {
1654     StringRef RegName = Parser.getTok().getString();
1655     if ((Reg = getSpecialRegForName(RegName))) {
1656       Parser.Lex();
1657       RegKind = IS_SPECIAL;
1658     } else {
1659       unsigned RegNumIndex = 0;
1660       if (RegName[0] == 'v') {
1661         RegNumIndex = 1;
1662         RegKind = IS_VGPR;
1663       } else if (RegName[0] == 's') {
1664         RegNumIndex = 1;
1665         RegKind = IS_SGPR;
1666       } else if (RegName.startswith("ttmp")) {
1667         RegNumIndex = strlen("ttmp");
1668         RegKind = IS_TTMP;
1669       } else {
1670         return false;
1671       }
1672       if (RegName.size() > RegNumIndex) {
1673         // Single 32-bit register: vXX.
1674         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1675           return false;
1676         Parser.Lex();
1677         RegWidth = 1;
1678       } else {
1679         // Range of registers: v[XX:YY]. ":YY" is optional.
1680         Parser.Lex();
1681         int64_t RegLo, RegHi;
1682         if (getLexer().isNot(AsmToken::LBrac))
1683           return false;
1684         Parser.Lex();
1685 
1686         if (getParser().parseAbsoluteExpression(RegLo))
1687           return false;
1688 
1689         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1690         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1691           return false;
1692         Parser.Lex();
1693 
1694         if (isRBrace) {
1695           RegHi = RegLo;
1696         } else {
1697           if (getParser().parseAbsoluteExpression(RegHi))
1698             return false;
1699 
1700           if (getLexer().isNot(AsmToken::RBrac))
1701             return false;
1702           Parser.Lex();
1703         }
1704         RegNum = (unsigned) RegLo;
1705         RegWidth = (RegHi - RegLo) + 1;
1706       }
1707     }
1708   } else if (getLexer().is(AsmToken::LBrac)) {
1709     // List of consecutive registers: [s0,s1,s2,s3]
1710     Parser.Lex();
1711     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1712       return false;
1713     if (RegWidth != 1)
1714       return false;
1715     RegisterKind RegKind1;
1716     unsigned Reg1, RegNum1, RegWidth1;
1717     do {
1718       if (getLexer().is(AsmToken::Comma)) {
1719         Parser.Lex();
1720       } else if (getLexer().is(AsmToken::RBrac)) {
1721         Parser.Lex();
1722         break;
1723       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1724         if (RegWidth1 != 1) {
1725           return false;
1726         }
1727         if (RegKind1 != RegKind) {
1728           return false;
1729         }
1730         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1731           return false;
1732         }
1733       } else {
1734         return false;
1735       }
1736     } while (true);
1737   } else {
1738     return false;
1739   }
1740   switch (RegKind) {
1741   case IS_SPECIAL:
1742     RegNum = 0;
1743     RegWidth = 1;
1744     break;
1745   case IS_VGPR:
1746   case IS_SGPR:
1747   case IS_TTMP:
1748   {
1749     unsigned Size = 1;
1750     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1751       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1752       Size = std::min(RegWidth, 4u);
1753     }
1754     if (RegNum % Size != 0)
1755       return false;
1756     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1757     RegNum = RegNum / Size;
1758     int RCID = getRegClass(RegKind, RegWidth);
1759     if (RCID == -1)
1760       return false;
1761     const MCRegisterClass RC = TRI->getRegClass(RCID);
1762     if (RegNum >= RC.getNumRegs())
1763       return false;
1764     Reg = RC.getRegister(RegNum);
1765     break;
1766   }
1767 
1768   default:
1769     llvm_unreachable("unexpected register kind");
1770   }
1771 
1772   if (!subtargetHasRegister(*TRI, Reg))
1773     return false;
1774   return true;
1775 }
1776 
1777 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1778   const auto &Tok = Parser.getTok();
1779   SMLoc StartLoc = Tok.getLoc();
1780   SMLoc EndLoc = Tok.getEndLoc();
1781   RegisterKind RegKind;
1782   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1783 
1784   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1785     return nullptr;
1786   }
1787   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1788   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1789 }
1790 
1791 bool
1792 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1793   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1794       (getLexer().getKind() == AsmToken::Integer ||
1795        getLexer().getKind() == AsmToken::Real)) {
1796     // This is a workaround for handling operands like these:
1797     //     |1.0|
1798     //     |-1|
1799     // This syntax is not compatible with syntax of standard
1800     // MC expressions (due to the trailing '|').
1801 
1802     SMLoc EndLoc;
1803     const MCExpr *Expr;
1804 
1805     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1806       return true;
1807     }
1808 
1809     return !Expr->evaluateAsAbsolute(Val);
1810   }
1811 
1812   return getParser().parseAbsoluteExpression(Val);
1813 }
1814 
1815 OperandMatchResultTy
1816 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1817   // TODO: add syntactic sugar for 1/(2*PI)
1818   bool Minus = false;
1819   if (getLexer().getKind() == AsmToken::Minus) {
1820     const AsmToken NextToken = getLexer().peekTok();
1821     if (!NextToken.is(AsmToken::Integer) &&
1822         !NextToken.is(AsmToken::Real)) {
1823         return MatchOperand_NoMatch;
1824     }
1825     Minus = true;
1826     Parser.Lex();
1827   }
1828 
1829   SMLoc S = Parser.getTok().getLoc();
1830   switch(getLexer().getKind()) {
1831   case AsmToken::Integer: {
1832     int64_t IntVal;
1833     if (parseAbsoluteExpr(IntVal, AbsMod))
1834       return MatchOperand_ParseFail;
1835     if (Minus)
1836       IntVal *= -1;
1837     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1838     return MatchOperand_Success;
1839   }
1840   case AsmToken::Real: {
1841     int64_t IntVal;
1842     if (parseAbsoluteExpr(IntVal, AbsMod))
1843       return MatchOperand_ParseFail;
1844 
1845     APFloat F(BitsToDouble(IntVal));
1846     if (Minus)
1847       F.changeSign();
1848     Operands.push_back(
1849         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1850                                  AMDGPUOperand::ImmTyNone, true));
1851     return MatchOperand_Success;
1852   }
1853   default:
1854     return MatchOperand_NoMatch;
1855   }
1856 }
1857 
1858 OperandMatchResultTy
1859 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1860   if (auto R = parseRegister()) {
1861     assert(R->isReg());
1862     R->Reg.IsForcedVOP3 = isForcedVOP3();
1863     Operands.push_back(std::move(R));
1864     return MatchOperand_Success;
1865   }
1866   return MatchOperand_NoMatch;
1867 }
1868 
1869 OperandMatchResultTy
1870 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1871   auto res = parseImm(Operands, AbsMod);
1872   if (res != MatchOperand_NoMatch) {
1873     return res;
1874   }
1875 
1876   return parseReg(Operands);
1877 }
1878 
1879 OperandMatchResultTy
1880 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1881                                               bool AllowImm) {
1882   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1883 
1884   if (getLexer().getKind()== AsmToken::Minus) {
1885     const AsmToken NextToken = getLexer().peekTok();
1886 
1887     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1888     if (NextToken.is(AsmToken::Minus)) {
1889       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1890       return MatchOperand_ParseFail;
1891     }
1892 
1893     // '-' followed by an integer literal N should be interpreted as integer
1894     // negation rather than a floating-point NEG modifier applied to N.
1895     // Beside being contr-intuitive, such use of floating-point NEG modifier
1896     // results in different meaning of integer literals used with VOP1/2/C
1897     // and VOP3, for example:
1898     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1899     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1900     // Negative fp literals should be handled likewise for unifomtity
1901     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1902       Parser.Lex();
1903       Negate = true;
1904     }
1905   }
1906 
1907   if (getLexer().getKind() == AsmToken::Identifier &&
1908       Parser.getTok().getString() == "neg") {
1909     if (Negate) {
1910       Error(Parser.getTok().getLoc(), "expected register or immediate");
1911       return MatchOperand_ParseFail;
1912     }
1913     Parser.Lex();
1914     Negate2 = true;
1915     if (getLexer().isNot(AsmToken::LParen)) {
1916       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1917       return MatchOperand_ParseFail;
1918     }
1919     Parser.Lex();
1920   }
1921 
1922   if (getLexer().getKind() == AsmToken::Identifier &&
1923       Parser.getTok().getString() == "abs") {
1924     Parser.Lex();
1925     Abs2 = true;
1926     if (getLexer().isNot(AsmToken::LParen)) {
1927       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1928       return MatchOperand_ParseFail;
1929     }
1930     Parser.Lex();
1931   }
1932 
1933   if (getLexer().getKind() == AsmToken::Pipe) {
1934     if (Abs2) {
1935       Error(Parser.getTok().getLoc(), "expected register or immediate");
1936       return MatchOperand_ParseFail;
1937     }
1938     Parser.Lex();
1939     Abs = true;
1940   }
1941 
1942   OperandMatchResultTy Res;
1943   if (AllowImm) {
1944     Res = parseRegOrImm(Operands, Abs);
1945   } else {
1946     Res = parseReg(Operands);
1947   }
1948   if (Res != MatchOperand_Success) {
1949     return Res;
1950   }
1951 
1952   AMDGPUOperand::Modifiers Mods;
1953   if (Abs) {
1954     if (getLexer().getKind() != AsmToken::Pipe) {
1955       Error(Parser.getTok().getLoc(), "expected vertical bar");
1956       return MatchOperand_ParseFail;
1957     }
1958     Parser.Lex();
1959     Mods.Abs = true;
1960   }
1961   if (Abs2) {
1962     if (getLexer().isNot(AsmToken::RParen)) {
1963       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1964       return MatchOperand_ParseFail;
1965     }
1966     Parser.Lex();
1967     Mods.Abs = true;
1968   }
1969 
1970   if (Negate) {
1971     Mods.Neg = true;
1972   } else if (Negate2) {
1973     if (getLexer().isNot(AsmToken::RParen)) {
1974       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1975       return MatchOperand_ParseFail;
1976     }
1977     Parser.Lex();
1978     Mods.Neg = true;
1979   }
1980 
1981   if (Mods.hasFPModifiers()) {
1982     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1983     Op.setModifiers(Mods);
1984   }
1985   return MatchOperand_Success;
1986 }
1987 
1988 OperandMatchResultTy
1989 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1990                                                bool AllowImm) {
1991   bool Sext = false;
1992 
1993   if (getLexer().getKind() == AsmToken::Identifier &&
1994       Parser.getTok().getString() == "sext") {
1995     Parser.Lex();
1996     Sext = true;
1997     if (getLexer().isNot(AsmToken::LParen)) {
1998       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1999       return MatchOperand_ParseFail;
2000     }
2001     Parser.Lex();
2002   }
2003 
2004   OperandMatchResultTy Res;
2005   if (AllowImm) {
2006     Res = parseRegOrImm(Operands);
2007   } else {
2008     Res = parseReg(Operands);
2009   }
2010   if (Res != MatchOperand_Success) {
2011     return Res;
2012   }
2013 
2014   AMDGPUOperand::Modifiers Mods;
2015   if (Sext) {
2016     if (getLexer().isNot(AsmToken::RParen)) {
2017       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2018       return MatchOperand_ParseFail;
2019     }
2020     Parser.Lex();
2021     Mods.Sext = true;
2022   }
2023 
2024   if (Mods.hasIntModifiers()) {
2025     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2026     Op.setModifiers(Mods);
2027   }
2028 
2029   return MatchOperand_Success;
2030 }
2031 
2032 OperandMatchResultTy
2033 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2034   return parseRegOrImmWithFPInputMods(Operands, false);
2035 }
2036 
2037 OperandMatchResultTy
2038 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2039   return parseRegOrImmWithIntInputMods(Operands, false);
2040 }
2041 
2042 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2043   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2044   if (Reg) {
2045     Operands.push_back(std::move(Reg));
2046     return MatchOperand_Success;
2047   }
2048 
2049   const AsmToken &Tok = Parser.getTok();
2050   if (Tok.getString() == "off") {
2051     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2052                                                 AMDGPUOperand::ImmTyOff, false));
2053     Parser.Lex();
2054     return MatchOperand_Success;
2055   }
2056 
2057   return MatchOperand_NoMatch;
2058 }
2059 
2060 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2061   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2062 
2063   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2064       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2065       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2066       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2067     return Match_InvalidOperand;
2068 
2069   if ((TSFlags & SIInstrFlags::VOP3) &&
2070       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2071       getForcedEncodingSize() != 64)
2072     return Match_PreferE32;
2073 
2074   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2075       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2076     // v_mac_f32/16 allow only dst_sel == DWORD;
2077     auto OpNum =
2078         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2079     const auto &Op = Inst.getOperand(OpNum);
2080     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2081       return Match_InvalidOperand;
2082     }
2083   }
2084 
2085   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2086     // FIXME: Produces error without correct column reported.
2087     auto OpNum =
2088         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2089     const auto &Op = Inst.getOperand(OpNum);
2090     if (Op.getImm() != 0)
2091       return Match_InvalidOperand;
2092   }
2093 
2094   return Match_Success;
2095 }
2096 
2097 // What asm variants we should check
2098 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2099   if (getForcedEncodingSize() == 32) {
2100     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2101     return makeArrayRef(Variants);
2102   }
2103 
2104   if (isForcedVOP3()) {
2105     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2106     return makeArrayRef(Variants);
2107   }
2108 
2109   if (isForcedSDWA()) {
2110     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2111                                         AMDGPUAsmVariants::SDWA9};
2112     return makeArrayRef(Variants);
2113   }
2114 
2115   if (isForcedDPP()) {
2116     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2117     return makeArrayRef(Variants);
2118   }
2119 
2120   static const unsigned Variants[] = {
2121     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2122     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2123   };
2124 
2125   return makeArrayRef(Variants);
2126 }
2127 
2128 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2129   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2130   const unsigned Num = Desc.getNumImplicitUses();
2131   for (unsigned i = 0; i < Num; ++i) {
2132     unsigned Reg = Desc.ImplicitUses[i];
2133     switch (Reg) {
2134     case AMDGPU::FLAT_SCR:
2135     case AMDGPU::VCC:
2136     case AMDGPU::M0:
2137       return Reg;
2138     default:
2139       break;
2140     }
2141   }
2142   return AMDGPU::NoRegister;
2143 }
2144 
2145 // NB: This code is correct only when used to check constant
2146 // bus limitations because GFX7 support no f16 inline constants.
2147 // Note that there are no cases when a GFX7 opcode violates
2148 // constant bus limitations due to the use of an f16 constant.
2149 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2150                                        unsigned OpIdx) const {
2151   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2152 
2153   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2154     return false;
2155   }
2156 
2157   const MCOperand &MO = Inst.getOperand(OpIdx);
2158 
2159   int64_t Val = MO.getImm();
2160   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2161 
2162   switch (OpSize) { // expected operand size
2163   case 8:
2164     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2165   case 4:
2166     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2167   case 2: {
2168     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2169     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2170         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2171       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2172     } else {
2173       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2174     }
2175   }
2176   default:
2177     llvm_unreachable("invalid operand size");
2178   }
2179 }
2180 
2181 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2182   const MCOperand &MO = Inst.getOperand(OpIdx);
2183   if (MO.isImm()) {
2184     return !isInlineConstant(Inst, OpIdx);
2185   }
2186   return !MO.isReg() ||
2187          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2188 }
2189 
2190 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2191   const unsigned Opcode = Inst.getOpcode();
2192   const MCInstrDesc &Desc = MII.get(Opcode);
2193   unsigned ConstantBusUseCount = 0;
2194 
2195   if (Desc.TSFlags &
2196       (SIInstrFlags::VOPC |
2197        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2198        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2199        SIInstrFlags::SDWA)) {
2200     // Check special imm operands (used by madmk, etc)
2201     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2202       ++ConstantBusUseCount;
2203     }
2204 
2205     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2206     if (SGPRUsed != AMDGPU::NoRegister) {
2207       ++ConstantBusUseCount;
2208     }
2209 
2210     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2211     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2212     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2213 
2214     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2215 
2216     for (int OpIdx : OpIndices) {
2217       if (OpIdx == -1) break;
2218 
2219       const MCOperand &MO = Inst.getOperand(OpIdx);
2220       if (usesConstantBus(Inst, OpIdx)) {
2221         if (MO.isReg()) {
2222           const unsigned Reg = mc2PseudoReg(MO.getReg());
2223           // Pairs of registers with a partial intersections like these
2224           //   s0, s[0:1]
2225           //   flat_scratch_lo, flat_scratch
2226           //   flat_scratch_lo, flat_scratch_hi
2227           // are theoretically valid but they are disabled anyway.
2228           // Note that this code mimics SIInstrInfo::verifyInstruction
2229           if (Reg != SGPRUsed) {
2230             ++ConstantBusUseCount;
2231           }
2232           SGPRUsed = Reg;
2233         } else { // Expression or a literal
2234           ++ConstantBusUseCount;
2235         }
2236       }
2237     }
2238   }
2239 
2240   return ConstantBusUseCount <= 1;
2241 }
2242 
2243 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2244   const unsigned Opcode = Inst.getOpcode();
2245   const MCInstrDesc &Desc = MII.get(Opcode);
2246 
2247   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2248   if (DstIdx == -1 ||
2249       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2250     return true;
2251   }
2252 
2253   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2254 
2255   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2256   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2257   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2258 
2259   assert(DstIdx != -1);
2260   const MCOperand &Dst = Inst.getOperand(DstIdx);
2261   assert(Dst.isReg());
2262   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2263 
2264   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2265 
2266   for (int SrcIdx : SrcIndices) {
2267     if (SrcIdx == -1) break;
2268     const MCOperand &Src = Inst.getOperand(SrcIdx);
2269     if (Src.isReg()) {
2270       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2271       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2272         return false;
2273       }
2274     }
2275   }
2276 
2277   return true;
2278 }
2279 
2280 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2281 
2282   const unsigned Opc = Inst.getOpcode();
2283   const MCInstrDesc &Desc = MII.get(Opc);
2284 
2285   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2286     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2287     assert(ClampIdx != -1);
2288     return Inst.getOperand(ClampIdx).getImm() == 0;
2289   }
2290 
2291   return true;
2292 }
2293 
2294 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2295 
2296   const unsigned Opc = Inst.getOpcode();
2297   const MCInstrDesc &Desc = MII.get(Opc);
2298 
2299   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2300     return true;
2301 
2302   // Gather4 instructions seem to have special rules not described in spec.
2303   if (Desc.TSFlags & SIInstrFlags::Gather4)
2304     return true;
2305 
2306   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2307   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2308   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2309 
2310   assert(VDataIdx != -1);
2311   assert(DMaskIdx != -1);
2312   assert(TFEIdx != -1);
2313 
2314   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2315   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2316   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2317   if (DMask == 0)
2318     DMask = 1;
2319 
2320   unsigned DataSize = countPopulation(DMask);
2321   if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) {
2322     DataSize = (DataSize + 1) / 2;
2323   }
2324 
2325   return (VDataSize / 4) == DataSize + TFESize;
2326 }
2327 
2328 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2329 
2330   const unsigned Opc = Inst.getOpcode();
2331   const MCInstrDesc &Desc = MII.get(Opc);
2332 
2333   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2334     return true;
2335   if (!Desc.mayLoad() || !Desc.mayStore())
2336     return true; // Not atomic
2337 
2338   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2339   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2340 
2341   // This is an incomplete check because image_atomic_cmpswap
2342   // may only use 0x3 and 0xf while other atomic operations
2343   // may use 0x1 and 0x3. However these limitations are
2344   // verified when we check that dmask matches dst size.
2345   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2346 }
2347 
2348 bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
2349 
2350   const unsigned Opc = Inst.getOpcode();
2351   const MCInstrDesc &Desc = MII.get(Opc);
2352 
2353   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2354     return true;
2355 
2356   int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
2357   assert(Idx != -1);
2358 
2359   bool R128 = (Inst.getOperand(Idx).getImm() != 0);
2360 
2361   return !R128 || hasMIMG_R128();
2362 }
2363 
2364 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2365 
2366   const unsigned Opc = Inst.getOpcode();
2367   const MCInstrDesc &Desc = MII.get(Opc);
2368 
2369   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2370     return true;
2371   if ((Desc.TSFlags & SIInstrFlags::D16) == 0)
2372     return true;
2373 
2374   return !isCI() && !isSI();
2375 }
2376 
2377 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2378                                           const SMLoc &IDLoc) {
2379   if (!validateConstantBusLimitations(Inst)) {
2380     Error(IDLoc,
2381       "invalid operand (violates constant bus restrictions)");
2382     return false;
2383   }
2384   if (!validateEarlyClobberLimitations(Inst)) {
2385     Error(IDLoc,
2386       "destination must be different than all sources");
2387     return false;
2388   }
2389   if (!validateIntClampSupported(Inst)) {
2390     Error(IDLoc,
2391       "integer clamping is not supported on this GPU");
2392     return false;
2393   }
2394   if (!validateMIMGR128(Inst)) {
2395     Error(IDLoc,
2396       "r128 modifier is not supported on this GPU");
2397     return false;
2398   }
2399   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2400   if (!validateMIMGD16(Inst)) {
2401     Error(IDLoc,
2402       "d16 modifier is not supported on this GPU");
2403     return false;
2404   }
2405   if (!validateMIMGDataSize(Inst)) {
2406     Error(IDLoc,
2407       "image data size does not match dmask and tfe");
2408     return false;
2409   }
2410   if (!validateMIMGAtomicDMask(Inst)) {
2411     Error(IDLoc,
2412       "invalid atomic image dmask");
2413     return false;
2414   }
2415 
2416   return true;
2417 }
2418 
2419 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2420                                             unsigned VariantID = 0);
2421 
2422 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2423                                               OperandVector &Operands,
2424                                               MCStreamer &Out,
2425                                               uint64_t &ErrorInfo,
2426                                               bool MatchingInlineAsm) {
2427   MCInst Inst;
2428   unsigned Result = Match_Success;
2429   for (auto Variant : getMatchedVariants()) {
2430     uint64_t EI;
2431     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2432                                   Variant);
2433     // We order match statuses from least to most specific. We use most specific
2434     // status as resulting
2435     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2436     if ((R == Match_Success) ||
2437         (R == Match_PreferE32) ||
2438         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2439         (R == Match_InvalidOperand && Result != Match_MissingFeature
2440                                    && Result != Match_PreferE32) ||
2441         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2442                                    && Result != Match_MissingFeature
2443                                    && Result != Match_PreferE32)) {
2444       Result = R;
2445       ErrorInfo = EI;
2446     }
2447     if (R == Match_Success)
2448       break;
2449   }
2450 
2451   switch (Result) {
2452   default: break;
2453   case Match_Success:
2454     if (!validateInstruction(Inst, IDLoc)) {
2455       return true;
2456     }
2457     Inst.setLoc(IDLoc);
2458     Out.EmitInstruction(Inst, getSTI());
2459     return false;
2460 
2461   case Match_MissingFeature:
2462     return Error(IDLoc, "instruction not supported on this GPU");
2463 
2464   case Match_MnemonicFail: {
2465     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2466     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2467         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2468     return Error(IDLoc, "invalid instruction" + Suggestion,
2469                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2470   }
2471 
2472   case Match_InvalidOperand: {
2473     SMLoc ErrorLoc = IDLoc;
2474     if (ErrorInfo != ~0ULL) {
2475       if (ErrorInfo >= Operands.size()) {
2476         return Error(IDLoc, "too few operands for instruction");
2477       }
2478       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2479       if (ErrorLoc == SMLoc())
2480         ErrorLoc = IDLoc;
2481     }
2482     return Error(ErrorLoc, "invalid operand for instruction");
2483   }
2484 
2485   case Match_PreferE32:
2486     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2487                         "should be encoded as e32");
2488   }
2489   llvm_unreachable("Implement any new match types added!");
2490 }
2491 
2492 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2493   int64_t Tmp = -1;
2494   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2495     return true;
2496   }
2497   if (getParser().parseAbsoluteExpression(Tmp)) {
2498     return true;
2499   }
2500   Ret = static_cast<uint32_t>(Tmp);
2501   return false;
2502 }
2503 
2504 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2505                                                uint32_t &Minor) {
2506   if (ParseAsAbsoluteExpression(Major))
2507     return TokError("invalid major version");
2508 
2509   if (getLexer().isNot(AsmToken::Comma))
2510     return TokError("minor version number required, comma expected");
2511   Lex();
2512 
2513   if (ParseAsAbsoluteExpression(Minor))
2514     return TokError("invalid minor version");
2515 
2516   return false;
2517 }
2518 
2519 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2520   uint32_t Major;
2521   uint32_t Minor;
2522 
2523   if (ParseDirectiveMajorMinor(Major, Minor))
2524     return true;
2525 
2526   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2527   return false;
2528 }
2529 
2530 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2531   uint32_t Major;
2532   uint32_t Minor;
2533   uint32_t Stepping;
2534   StringRef VendorName;
2535   StringRef ArchName;
2536 
2537   // If this directive has no arguments, then use the ISA version for the
2538   // targeted GPU.
2539   if (getLexer().is(AsmToken::EndOfStatement)) {
2540     AMDGPU::IsaInfo::IsaVersion ISA =
2541         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2542     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2543                                                       ISA.Stepping,
2544                                                       "AMD", "AMDGPU");
2545     return false;
2546   }
2547 
2548   if (ParseDirectiveMajorMinor(Major, Minor))
2549     return true;
2550 
2551   if (getLexer().isNot(AsmToken::Comma))
2552     return TokError("stepping version number required, comma expected");
2553   Lex();
2554 
2555   if (ParseAsAbsoluteExpression(Stepping))
2556     return TokError("invalid stepping version");
2557 
2558   if (getLexer().isNot(AsmToken::Comma))
2559     return TokError("vendor name required, comma expected");
2560   Lex();
2561 
2562   if (getLexer().isNot(AsmToken::String))
2563     return TokError("invalid vendor name");
2564 
2565   VendorName = getLexer().getTok().getStringContents();
2566   Lex();
2567 
2568   if (getLexer().isNot(AsmToken::Comma))
2569     return TokError("arch name required, comma expected");
2570   Lex();
2571 
2572   if (getLexer().isNot(AsmToken::String))
2573     return TokError("invalid arch name");
2574 
2575   ArchName = getLexer().getTok().getStringContents();
2576   Lex();
2577 
2578   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2579                                                     VendorName, ArchName);
2580   return false;
2581 }
2582 
2583 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2584                                                amd_kernel_code_t &Header) {
2585   SmallString<40> ErrStr;
2586   raw_svector_ostream Err(ErrStr);
2587   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2588     return TokError(Err.str());
2589   }
2590   Lex();
2591   return false;
2592 }
2593 
2594 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2595   amd_kernel_code_t Header;
2596   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2597 
2598   while (true) {
2599     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2600     // will set the current token to EndOfStatement.
2601     while(getLexer().is(AsmToken::EndOfStatement))
2602       Lex();
2603 
2604     if (getLexer().isNot(AsmToken::Identifier))
2605       return TokError("expected value identifier or .end_amd_kernel_code_t");
2606 
2607     StringRef ID = getLexer().getTok().getIdentifier();
2608     Lex();
2609 
2610     if (ID == ".end_amd_kernel_code_t")
2611       break;
2612 
2613     if (ParseAMDKernelCodeTValue(ID, Header))
2614       return true;
2615   }
2616 
2617   getTargetStreamer().EmitAMDKernelCodeT(Header);
2618 
2619   return false;
2620 }
2621 
2622 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2623   if (getLexer().isNot(AsmToken::Identifier))
2624     return TokError("expected symbol name");
2625 
2626   StringRef KernelName = Parser.getTok().getString();
2627 
2628   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2629                                            ELF::STT_AMDGPU_HSA_KERNEL);
2630   Lex();
2631   KernelScope.initialize(getContext());
2632   return false;
2633 }
2634 
2635 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
2636   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
2637     return Error(getParser().getTok().getLoc(),
2638                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
2639                  "architectures");
2640   }
2641 
2642   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
2643 
2644   std::string ISAVersionStringFromSTI;
2645   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
2646   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
2647 
2648   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
2649     return Error(getParser().getTok().getLoc(),
2650                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
2651                  "arguments specified through the command line");
2652   }
2653 
2654   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
2655   Lex();
2656 
2657   return false;
2658 }
2659 
2660 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
2661   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
2662     return Error(getParser().getTok().getLoc(),
2663                  (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
2664                  "not available on non-amdhsa OSes")).str());
2665   }
2666 
2667   std::string HSAMetadataString;
2668   raw_string_ostream YamlStream(HSAMetadataString);
2669 
2670   getLexer().setSkipSpace(false);
2671 
2672   bool FoundEnd = false;
2673   while (!getLexer().is(AsmToken::Eof)) {
2674     while (getLexer().is(AsmToken::Space)) {
2675       YamlStream << getLexer().getTok().getString();
2676       Lex();
2677     }
2678 
2679     if (getLexer().is(AsmToken::Identifier)) {
2680       StringRef ID = getLexer().getTok().getIdentifier();
2681       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
2682         Lex();
2683         FoundEnd = true;
2684         break;
2685       }
2686     }
2687 
2688     YamlStream << Parser.parseStringToEndOfStatement()
2689                << getContext().getAsmInfo()->getSeparatorString();
2690 
2691     Parser.eatToEndOfStatement();
2692   }
2693 
2694   getLexer().setSkipSpace(true);
2695 
2696   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2697     return TokError(Twine("expected directive ") +
2698                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
2699   }
2700 
2701   YamlStream.flush();
2702 
2703   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
2704     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
2705 
2706   return false;
2707 }
2708 
2709 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
2710   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
2711     return Error(getParser().getTok().getLoc(),
2712                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
2713                  "not available on non-amdpal OSes")).str());
2714   }
2715 
2716   PALMD::Metadata PALMetadata;
2717   for (;;) {
2718     uint32_t Value;
2719     if (ParseAsAbsoluteExpression(Value)) {
2720       return TokError(Twine("invalid value in ") +
2721                       Twine(PALMD::AssemblerDirective));
2722     }
2723     PALMetadata.push_back(Value);
2724     if (getLexer().isNot(AsmToken::Comma))
2725       break;
2726     Lex();
2727   }
2728   getTargetStreamer().EmitPALMetadata(PALMetadata);
2729   return false;
2730 }
2731 
2732 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2733   StringRef IDVal = DirectiveID.getString();
2734 
2735   if (IDVal == ".hsa_code_object_version")
2736     return ParseDirectiveHSACodeObjectVersion();
2737 
2738   if (IDVal == ".hsa_code_object_isa")
2739     return ParseDirectiveHSACodeObjectISA();
2740 
2741   if (IDVal == ".amd_kernel_code_t")
2742     return ParseDirectiveAMDKernelCodeT();
2743 
2744   if (IDVal == ".amdgpu_hsa_kernel")
2745     return ParseDirectiveAMDGPUHsaKernel();
2746 
2747   if (IDVal == ".amd_amdgpu_isa")
2748     return ParseDirectiveISAVersion();
2749 
2750   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
2751     return ParseDirectiveHSAMetadata();
2752 
2753   if (IDVal == PALMD::AssemblerDirective)
2754     return ParseDirectivePALMetadata();
2755 
2756   return true;
2757 }
2758 
2759 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2760                                            unsigned RegNo) const {
2761 
2762   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
2763        R.isValid(); ++R) {
2764     if (*R == RegNo)
2765       return isGFX9();
2766   }
2767 
2768   switch (RegNo) {
2769   case AMDGPU::TBA:
2770   case AMDGPU::TBA_LO:
2771   case AMDGPU::TBA_HI:
2772   case AMDGPU::TMA:
2773   case AMDGPU::TMA_LO:
2774   case AMDGPU::TMA_HI:
2775     return !isGFX9();
2776   case AMDGPU::XNACK_MASK:
2777   case AMDGPU::XNACK_MASK_LO:
2778   case AMDGPU::XNACK_MASK_HI:
2779     return !isCI() && !isSI() && hasXNACK();
2780   default:
2781     break;
2782   }
2783 
2784   if (isCI())
2785     return true;
2786 
2787   if (isSI()) {
2788     // No flat_scr
2789     switch (RegNo) {
2790     case AMDGPU::FLAT_SCR:
2791     case AMDGPU::FLAT_SCR_LO:
2792     case AMDGPU::FLAT_SCR_HI:
2793       return false;
2794     default:
2795       return true;
2796     }
2797   }
2798 
2799   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2800   // SI/CI have.
2801   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2802        R.isValid(); ++R) {
2803     if (*R == RegNo)
2804       return false;
2805   }
2806 
2807   return true;
2808 }
2809 
2810 OperandMatchResultTy
2811 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2812   // Try to parse with a custom parser
2813   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2814 
2815   // If we successfully parsed the operand or if there as an error parsing,
2816   // we are done.
2817   //
2818   // If we are parsing after we reach EndOfStatement then this means we
2819   // are appending default values to the Operands list.  This is only done
2820   // by custom parser, so we shouldn't continue on to the generic parsing.
2821   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2822       getLexer().is(AsmToken::EndOfStatement))
2823     return ResTy;
2824 
2825   ResTy = parseRegOrImm(Operands);
2826 
2827   if (ResTy == MatchOperand_Success)
2828     return ResTy;
2829 
2830   const auto &Tok = Parser.getTok();
2831   SMLoc S = Tok.getLoc();
2832 
2833   const MCExpr *Expr = nullptr;
2834   if (!Parser.parseExpression(Expr)) {
2835     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2836     return MatchOperand_Success;
2837   }
2838 
2839   // Possibly this is an instruction flag like 'gds'.
2840   if (Tok.getKind() == AsmToken::Identifier) {
2841     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2842     Parser.Lex();
2843     return MatchOperand_Success;
2844   }
2845 
2846   return MatchOperand_NoMatch;
2847 }
2848 
2849 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2850   // Clear any forced encodings from the previous instruction.
2851   setForcedEncodingSize(0);
2852   setForcedDPP(false);
2853   setForcedSDWA(false);
2854 
2855   if (Name.endswith("_e64")) {
2856     setForcedEncodingSize(64);
2857     return Name.substr(0, Name.size() - 4);
2858   } else if (Name.endswith("_e32")) {
2859     setForcedEncodingSize(32);
2860     return Name.substr(0, Name.size() - 4);
2861   } else if (Name.endswith("_dpp")) {
2862     setForcedDPP(true);
2863     return Name.substr(0, Name.size() - 4);
2864   } else if (Name.endswith("_sdwa")) {
2865     setForcedSDWA(true);
2866     return Name.substr(0, Name.size() - 5);
2867   }
2868   return Name;
2869 }
2870 
2871 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2872                                        StringRef Name,
2873                                        SMLoc NameLoc, OperandVector &Operands) {
2874   // Add the instruction mnemonic
2875   Name = parseMnemonicSuffix(Name);
2876   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2877 
2878   while (!getLexer().is(AsmToken::EndOfStatement)) {
2879     OperandMatchResultTy Res = parseOperand(Operands, Name);
2880 
2881     // Eat the comma or space if there is one.
2882     if (getLexer().is(AsmToken::Comma))
2883       Parser.Lex();
2884 
2885     switch (Res) {
2886       case MatchOperand_Success: break;
2887       case MatchOperand_ParseFail:
2888         Error(getLexer().getLoc(), "failed parsing operand.");
2889         while (!getLexer().is(AsmToken::EndOfStatement)) {
2890           Parser.Lex();
2891         }
2892         return true;
2893       case MatchOperand_NoMatch:
2894         Error(getLexer().getLoc(), "not a valid operand.");
2895         while (!getLexer().is(AsmToken::EndOfStatement)) {
2896           Parser.Lex();
2897         }
2898         return true;
2899     }
2900   }
2901 
2902   return false;
2903 }
2904 
2905 //===----------------------------------------------------------------------===//
2906 // Utility functions
2907 //===----------------------------------------------------------------------===//
2908 
2909 OperandMatchResultTy
2910 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2911   switch(getLexer().getKind()) {
2912     default: return MatchOperand_NoMatch;
2913     case AsmToken::Identifier: {
2914       StringRef Name = Parser.getTok().getString();
2915       if (!Name.equals(Prefix)) {
2916         return MatchOperand_NoMatch;
2917       }
2918 
2919       Parser.Lex();
2920       if (getLexer().isNot(AsmToken::Colon))
2921         return MatchOperand_ParseFail;
2922 
2923       Parser.Lex();
2924 
2925       bool IsMinus = false;
2926       if (getLexer().getKind() == AsmToken::Minus) {
2927         Parser.Lex();
2928         IsMinus = true;
2929       }
2930 
2931       if (getLexer().isNot(AsmToken::Integer))
2932         return MatchOperand_ParseFail;
2933 
2934       if (getParser().parseAbsoluteExpression(Int))
2935         return MatchOperand_ParseFail;
2936 
2937       if (IsMinus)
2938         Int = -Int;
2939       break;
2940     }
2941   }
2942   return MatchOperand_Success;
2943 }
2944 
2945 OperandMatchResultTy
2946 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2947                                     AMDGPUOperand::ImmTy ImmTy,
2948                                     bool (*ConvertResult)(int64_t&)) {
2949   SMLoc S = Parser.getTok().getLoc();
2950   int64_t Value = 0;
2951 
2952   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2953   if (Res != MatchOperand_Success)
2954     return Res;
2955 
2956   if (ConvertResult && !ConvertResult(Value)) {
2957     return MatchOperand_ParseFail;
2958   }
2959 
2960   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2961   return MatchOperand_Success;
2962 }
2963 
2964 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2965   const char *Prefix,
2966   OperandVector &Operands,
2967   AMDGPUOperand::ImmTy ImmTy,
2968   bool (*ConvertResult)(int64_t&)) {
2969   StringRef Name = Parser.getTok().getString();
2970   if (!Name.equals(Prefix))
2971     return MatchOperand_NoMatch;
2972 
2973   Parser.Lex();
2974   if (getLexer().isNot(AsmToken::Colon))
2975     return MatchOperand_ParseFail;
2976 
2977   Parser.Lex();
2978   if (getLexer().isNot(AsmToken::LBrac))
2979     return MatchOperand_ParseFail;
2980   Parser.Lex();
2981 
2982   unsigned Val = 0;
2983   SMLoc S = Parser.getTok().getLoc();
2984 
2985   // FIXME: How to verify the number of elements matches the number of src
2986   // operands?
2987   for (int I = 0; I < 4; ++I) {
2988     if (I != 0) {
2989       if (getLexer().is(AsmToken::RBrac))
2990         break;
2991 
2992       if (getLexer().isNot(AsmToken::Comma))
2993         return MatchOperand_ParseFail;
2994       Parser.Lex();
2995     }
2996 
2997     if (getLexer().isNot(AsmToken::Integer))
2998       return MatchOperand_ParseFail;
2999 
3000     int64_t Op;
3001     if (getParser().parseAbsoluteExpression(Op))
3002       return MatchOperand_ParseFail;
3003 
3004     if (Op != 0 && Op != 1)
3005       return MatchOperand_ParseFail;
3006     Val |= (Op << I);
3007   }
3008 
3009   Parser.Lex();
3010   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3011   return MatchOperand_Success;
3012 }
3013 
3014 OperandMatchResultTy
3015 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3016                                AMDGPUOperand::ImmTy ImmTy) {
3017   int64_t Bit = 0;
3018   SMLoc S = Parser.getTok().getLoc();
3019 
3020   // We are at the end of the statement, and this is a default argument, so
3021   // use a default value.
3022   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3023     switch(getLexer().getKind()) {
3024       case AsmToken::Identifier: {
3025         StringRef Tok = Parser.getTok().getString();
3026         if (Tok == Name) {
3027           Bit = 1;
3028           Parser.Lex();
3029         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3030           Bit = 0;
3031           Parser.Lex();
3032         } else {
3033           return MatchOperand_NoMatch;
3034         }
3035         break;
3036       }
3037       default:
3038         return MatchOperand_NoMatch;
3039     }
3040   }
3041 
3042   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3043   return MatchOperand_Success;
3044 }
3045 
3046 static void addOptionalImmOperand(
3047   MCInst& Inst, const OperandVector& Operands,
3048   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3049   AMDGPUOperand::ImmTy ImmT,
3050   int64_t Default = 0) {
3051   auto i = OptionalIdx.find(ImmT);
3052   if (i != OptionalIdx.end()) {
3053     unsigned Idx = i->second;
3054     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3055   } else {
3056     Inst.addOperand(MCOperand::createImm(Default));
3057   }
3058 }
3059 
3060 OperandMatchResultTy
3061 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3062   if (getLexer().isNot(AsmToken::Identifier)) {
3063     return MatchOperand_NoMatch;
3064   }
3065   StringRef Tok = Parser.getTok().getString();
3066   if (Tok != Prefix) {
3067     return MatchOperand_NoMatch;
3068   }
3069 
3070   Parser.Lex();
3071   if (getLexer().isNot(AsmToken::Colon)) {
3072     return MatchOperand_ParseFail;
3073   }
3074 
3075   Parser.Lex();
3076   if (getLexer().isNot(AsmToken::Identifier)) {
3077     return MatchOperand_ParseFail;
3078   }
3079 
3080   Value = Parser.getTok().getString();
3081   return MatchOperand_Success;
3082 }
3083 
3084 //===----------------------------------------------------------------------===//
3085 // ds
3086 //===----------------------------------------------------------------------===//
3087 
3088 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3089                                     const OperandVector &Operands) {
3090   OptionalImmIndexMap OptionalIdx;
3091 
3092   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3093     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3094 
3095     // Add the register arguments
3096     if (Op.isReg()) {
3097       Op.addRegOperands(Inst, 1);
3098       continue;
3099     }
3100 
3101     // Handle optional arguments
3102     OptionalIdx[Op.getImmTy()] = i;
3103   }
3104 
3105   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3106   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3107   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3108 
3109   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3110 }
3111 
3112 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3113                                 bool IsGdsHardcoded) {
3114   OptionalImmIndexMap OptionalIdx;
3115 
3116   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3117     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3118 
3119     // Add the register arguments
3120     if (Op.isReg()) {
3121       Op.addRegOperands(Inst, 1);
3122       continue;
3123     }
3124 
3125     if (Op.isToken() && Op.getToken() == "gds") {
3126       IsGdsHardcoded = true;
3127       continue;
3128     }
3129 
3130     // Handle optional arguments
3131     OptionalIdx[Op.getImmTy()] = i;
3132   }
3133 
3134   AMDGPUOperand::ImmTy OffsetType =
3135     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3136      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3137                                                       AMDGPUOperand::ImmTyOffset;
3138 
3139   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3140 
3141   if (!IsGdsHardcoded) {
3142     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3143   }
3144   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3145 }
3146 
3147 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3148   OptionalImmIndexMap OptionalIdx;
3149 
3150   unsigned OperandIdx[4];
3151   unsigned EnMask = 0;
3152   int SrcIdx = 0;
3153 
3154   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3155     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3156 
3157     // Add the register arguments
3158     if (Op.isReg()) {
3159       assert(SrcIdx < 4);
3160       OperandIdx[SrcIdx] = Inst.size();
3161       Op.addRegOperands(Inst, 1);
3162       ++SrcIdx;
3163       continue;
3164     }
3165 
3166     if (Op.isOff()) {
3167       assert(SrcIdx < 4);
3168       OperandIdx[SrcIdx] = Inst.size();
3169       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3170       ++SrcIdx;
3171       continue;
3172     }
3173 
3174     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3175       Op.addImmOperands(Inst, 1);
3176       continue;
3177     }
3178 
3179     if (Op.isToken() && Op.getToken() == "done")
3180       continue;
3181 
3182     // Handle optional arguments
3183     OptionalIdx[Op.getImmTy()] = i;
3184   }
3185 
3186   assert(SrcIdx == 4);
3187 
3188   bool Compr = false;
3189   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3190     Compr = true;
3191     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3192     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3193     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3194   }
3195 
3196   for (auto i = 0; i < SrcIdx; ++i) {
3197     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3198       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3199     }
3200   }
3201 
3202   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3203   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3204 
3205   Inst.addOperand(MCOperand::createImm(EnMask));
3206 }
3207 
3208 //===----------------------------------------------------------------------===//
3209 // s_waitcnt
3210 //===----------------------------------------------------------------------===//
3211 
3212 static bool
3213 encodeCnt(
3214   const AMDGPU::IsaInfo::IsaVersion ISA,
3215   int64_t &IntVal,
3216   int64_t CntVal,
3217   bool Saturate,
3218   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3219   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3220 {
3221   bool Failed = false;
3222 
3223   IntVal = encode(ISA, IntVal, CntVal);
3224   if (CntVal != decode(ISA, IntVal)) {
3225     if (Saturate) {
3226       IntVal = encode(ISA, IntVal, -1);
3227     } else {
3228       Failed = true;
3229     }
3230   }
3231   return Failed;
3232 }
3233 
3234 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3235   StringRef CntName = Parser.getTok().getString();
3236   int64_t CntVal;
3237 
3238   Parser.Lex();
3239   if (getLexer().isNot(AsmToken::LParen))
3240     return true;
3241 
3242   Parser.Lex();
3243   if (getLexer().isNot(AsmToken::Integer))
3244     return true;
3245 
3246   SMLoc ValLoc = Parser.getTok().getLoc();
3247   if (getParser().parseAbsoluteExpression(CntVal))
3248     return true;
3249 
3250   AMDGPU::IsaInfo::IsaVersion ISA =
3251       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3252 
3253   bool Failed = true;
3254   bool Sat = CntName.endswith("_sat");
3255 
3256   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3257     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3258   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3259     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3260   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3261     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3262   }
3263 
3264   if (Failed) {
3265     Error(ValLoc, "too large value for " + CntName);
3266     return true;
3267   }
3268 
3269   if (getLexer().isNot(AsmToken::RParen)) {
3270     return true;
3271   }
3272 
3273   Parser.Lex();
3274   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3275     const AsmToken NextToken = getLexer().peekTok();
3276     if (NextToken.is(AsmToken::Identifier)) {
3277       Parser.Lex();
3278     }
3279   }
3280 
3281   return false;
3282 }
3283 
3284 OperandMatchResultTy
3285 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3286   AMDGPU::IsaInfo::IsaVersion ISA =
3287       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3288   int64_t Waitcnt = getWaitcntBitMask(ISA);
3289   SMLoc S = Parser.getTok().getLoc();
3290 
3291   switch(getLexer().getKind()) {
3292     default: return MatchOperand_ParseFail;
3293     case AsmToken::Integer:
3294       // The operand can be an integer value.
3295       if (getParser().parseAbsoluteExpression(Waitcnt))
3296         return MatchOperand_ParseFail;
3297       break;
3298 
3299     case AsmToken::Identifier:
3300       do {
3301         if (parseCnt(Waitcnt))
3302           return MatchOperand_ParseFail;
3303       } while(getLexer().isNot(AsmToken::EndOfStatement));
3304       break;
3305   }
3306   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3307   return MatchOperand_Success;
3308 }
3309 
3310 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3311                                           int64_t &Width) {
3312   using namespace llvm::AMDGPU::Hwreg;
3313 
3314   if (Parser.getTok().getString() != "hwreg")
3315     return true;
3316   Parser.Lex();
3317 
3318   if (getLexer().isNot(AsmToken::LParen))
3319     return true;
3320   Parser.Lex();
3321 
3322   if (getLexer().is(AsmToken::Identifier)) {
3323     HwReg.IsSymbolic = true;
3324     HwReg.Id = ID_UNKNOWN_;
3325     const StringRef tok = Parser.getTok().getString();
3326     int Last = ID_SYMBOLIC_LAST_;
3327     if (isSI() || isCI() || isVI())
3328       Last = ID_SYMBOLIC_FIRST_GFX9_;
3329     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3330       if (tok == IdSymbolic[i]) {
3331         HwReg.Id = i;
3332         break;
3333       }
3334     }
3335     Parser.Lex();
3336   } else {
3337     HwReg.IsSymbolic = false;
3338     if (getLexer().isNot(AsmToken::Integer))
3339       return true;
3340     if (getParser().parseAbsoluteExpression(HwReg.Id))
3341       return true;
3342   }
3343 
3344   if (getLexer().is(AsmToken::RParen)) {
3345     Parser.Lex();
3346     return false;
3347   }
3348 
3349   // optional params
3350   if (getLexer().isNot(AsmToken::Comma))
3351     return true;
3352   Parser.Lex();
3353 
3354   if (getLexer().isNot(AsmToken::Integer))
3355     return true;
3356   if (getParser().parseAbsoluteExpression(Offset))
3357     return true;
3358 
3359   if (getLexer().isNot(AsmToken::Comma))
3360     return true;
3361   Parser.Lex();
3362 
3363   if (getLexer().isNot(AsmToken::Integer))
3364     return true;
3365   if (getParser().parseAbsoluteExpression(Width))
3366     return true;
3367 
3368   if (getLexer().isNot(AsmToken::RParen))
3369     return true;
3370   Parser.Lex();
3371 
3372   return false;
3373 }
3374 
3375 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3376   using namespace llvm::AMDGPU::Hwreg;
3377 
3378   int64_t Imm16Val = 0;
3379   SMLoc S = Parser.getTok().getLoc();
3380 
3381   switch(getLexer().getKind()) {
3382     default: return MatchOperand_NoMatch;
3383     case AsmToken::Integer:
3384       // The operand can be an integer value.
3385       if (getParser().parseAbsoluteExpression(Imm16Val))
3386         return MatchOperand_NoMatch;
3387       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3388         Error(S, "invalid immediate: only 16-bit values are legal");
3389         // Do not return error code, but create an imm operand anyway and proceed
3390         // to the next operand, if any. That avoids unneccessary error messages.
3391       }
3392       break;
3393 
3394     case AsmToken::Identifier: {
3395         OperandInfoTy HwReg(ID_UNKNOWN_);
3396         int64_t Offset = OFFSET_DEFAULT_;
3397         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3398         if (parseHwregConstruct(HwReg, Offset, Width))
3399           return MatchOperand_ParseFail;
3400         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3401           if (HwReg.IsSymbolic)
3402             Error(S, "invalid symbolic name of hardware register");
3403           else
3404             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3405         }
3406         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3407           Error(S, "invalid bit offset: only 5-bit values are legal");
3408         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3409           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3410         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3411       }
3412       break;
3413   }
3414   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3415   return MatchOperand_Success;
3416 }
3417 
3418 bool AMDGPUOperand::isSWaitCnt() const {
3419   return isImm();
3420 }
3421 
3422 bool AMDGPUOperand::isHwreg() const {
3423   return isImmTy(ImmTyHwreg);
3424 }
3425 
3426 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3427   using namespace llvm::AMDGPU::SendMsg;
3428 
3429   if (Parser.getTok().getString() != "sendmsg")
3430     return true;
3431   Parser.Lex();
3432 
3433   if (getLexer().isNot(AsmToken::LParen))
3434     return true;
3435   Parser.Lex();
3436 
3437   if (getLexer().is(AsmToken::Identifier)) {
3438     Msg.IsSymbolic = true;
3439     Msg.Id = ID_UNKNOWN_;
3440     const std::string tok = Parser.getTok().getString();
3441     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3442       switch(i) {
3443         default: continue; // Omit gaps.
3444         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3445       }
3446       if (tok == IdSymbolic[i]) {
3447         Msg.Id = i;
3448         break;
3449       }
3450     }
3451     Parser.Lex();
3452   } else {
3453     Msg.IsSymbolic = false;
3454     if (getLexer().isNot(AsmToken::Integer))
3455       return true;
3456     if (getParser().parseAbsoluteExpression(Msg.Id))
3457       return true;
3458     if (getLexer().is(AsmToken::Integer))
3459       if (getParser().parseAbsoluteExpression(Msg.Id))
3460         Msg.Id = ID_UNKNOWN_;
3461   }
3462   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3463     return false;
3464 
3465   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3466     if (getLexer().isNot(AsmToken::RParen))
3467       return true;
3468     Parser.Lex();
3469     return false;
3470   }
3471 
3472   if (getLexer().isNot(AsmToken::Comma))
3473     return true;
3474   Parser.Lex();
3475 
3476   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3477   Operation.Id = ID_UNKNOWN_;
3478   if (getLexer().is(AsmToken::Identifier)) {
3479     Operation.IsSymbolic = true;
3480     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3481     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3482     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3483     const StringRef Tok = Parser.getTok().getString();
3484     for (int i = F; i < L; ++i) {
3485       if (Tok == S[i]) {
3486         Operation.Id = i;
3487         break;
3488       }
3489     }
3490     Parser.Lex();
3491   } else {
3492     Operation.IsSymbolic = false;
3493     if (getLexer().isNot(AsmToken::Integer))
3494       return true;
3495     if (getParser().parseAbsoluteExpression(Operation.Id))
3496       return true;
3497   }
3498 
3499   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3500     // Stream id is optional.
3501     if (getLexer().is(AsmToken::RParen)) {
3502       Parser.Lex();
3503       return false;
3504     }
3505 
3506     if (getLexer().isNot(AsmToken::Comma))
3507       return true;
3508     Parser.Lex();
3509 
3510     if (getLexer().isNot(AsmToken::Integer))
3511       return true;
3512     if (getParser().parseAbsoluteExpression(StreamId))
3513       return true;
3514   }
3515 
3516   if (getLexer().isNot(AsmToken::RParen))
3517     return true;
3518   Parser.Lex();
3519   return false;
3520 }
3521 
3522 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3523   if (getLexer().getKind() != AsmToken::Identifier)
3524     return MatchOperand_NoMatch;
3525 
3526   StringRef Str = Parser.getTok().getString();
3527   int Slot = StringSwitch<int>(Str)
3528     .Case("p10", 0)
3529     .Case("p20", 1)
3530     .Case("p0", 2)
3531     .Default(-1);
3532 
3533   SMLoc S = Parser.getTok().getLoc();
3534   if (Slot == -1)
3535     return MatchOperand_ParseFail;
3536 
3537   Parser.Lex();
3538   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3539                                               AMDGPUOperand::ImmTyInterpSlot));
3540   return MatchOperand_Success;
3541 }
3542 
3543 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3544   if (getLexer().getKind() != AsmToken::Identifier)
3545     return MatchOperand_NoMatch;
3546 
3547   StringRef Str = Parser.getTok().getString();
3548   if (!Str.startswith("attr"))
3549     return MatchOperand_NoMatch;
3550 
3551   StringRef Chan = Str.take_back(2);
3552   int AttrChan = StringSwitch<int>(Chan)
3553     .Case(".x", 0)
3554     .Case(".y", 1)
3555     .Case(".z", 2)
3556     .Case(".w", 3)
3557     .Default(-1);
3558   if (AttrChan == -1)
3559     return MatchOperand_ParseFail;
3560 
3561   Str = Str.drop_back(2).drop_front(4);
3562 
3563   uint8_t Attr;
3564   if (Str.getAsInteger(10, Attr))
3565     return MatchOperand_ParseFail;
3566 
3567   SMLoc S = Parser.getTok().getLoc();
3568   Parser.Lex();
3569   if (Attr > 63) {
3570     Error(S, "out of bounds attr");
3571     return MatchOperand_Success;
3572   }
3573 
3574   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3575 
3576   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3577                                               AMDGPUOperand::ImmTyInterpAttr));
3578   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3579                                               AMDGPUOperand::ImmTyAttrChan));
3580   return MatchOperand_Success;
3581 }
3582 
3583 void AMDGPUAsmParser::errorExpTgt() {
3584   Error(Parser.getTok().getLoc(), "invalid exp target");
3585 }
3586 
3587 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3588                                                       uint8_t &Val) {
3589   if (Str == "null") {
3590     Val = 9;
3591     return MatchOperand_Success;
3592   }
3593 
3594   if (Str.startswith("mrt")) {
3595     Str = Str.drop_front(3);
3596     if (Str == "z") { // == mrtz
3597       Val = 8;
3598       return MatchOperand_Success;
3599     }
3600 
3601     if (Str.getAsInteger(10, Val))
3602       return MatchOperand_ParseFail;
3603 
3604     if (Val > 7)
3605       errorExpTgt();
3606 
3607     return MatchOperand_Success;
3608   }
3609 
3610   if (Str.startswith("pos")) {
3611     Str = Str.drop_front(3);
3612     if (Str.getAsInteger(10, Val))
3613       return MatchOperand_ParseFail;
3614 
3615     if (Val > 3)
3616       errorExpTgt();
3617 
3618     Val += 12;
3619     return MatchOperand_Success;
3620   }
3621 
3622   if (Str.startswith("param")) {
3623     Str = Str.drop_front(5);
3624     if (Str.getAsInteger(10, Val))
3625       return MatchOperand_ParseFail;
3626 
3627     if (Val >= 32)
3628       errorExpTgt();
3629 
3630     Val += 32;
3631     return MatchOperand_Success;
3632   }
3633 
3634   if (Str.startswith("invalid_target_")) {
3635     Str = Str.drop_front(15);
3636     if (Str.getAsInteger(10, Val))
3637       return MatchOperand_ParseFail;
3638 
3639     errorExpTgt();
3640     return MatchOperand_Success;
3641   }
3642 
3643   return MatchOperand_NoMatch;
3644 }
3645 
3646 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3647   uint8_t Val;
3648   StringRef Str = Parser.getTok().getString();
3649 
3650   auto Res = parseExpTgtImpl(Str, Val);
3651   if (Res != MatchOperand_Success)
3652     return Res;
3653 
3654   SMLoc S = Parser.getTok().getLoc();
3655   Parser.Lex();
3656 
3657   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3658                                               AMDGPUOperand::ImmTyExpTgt));
3659   return MatchOperand_Success;
3660 }
3661 
3662 OperandMatchResultTy
3663 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3664   using namespace llvm::AMDGPU::SendMsg;
3665 
3666   int64_t Imm16Val = 0;
3667   SMLoc S = Parser.getTok().getLoc();
3668 
3669   switch(getLexer().getKind()) {
3670   default:
3671     return MatchOperand_NoMatch;
3672   case AsmToken::Integer:
3673     // The operand can be an integer value.
3674     if (getParser().parseAbsoluteExpression(Imm16Val))
3675       return MatchOperand_NoMatch;
3676     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3677       Error(S, "invalid immediate: only 16-bit values are legal");
3678       // Do not return error code, but create an imm operand anyway and proceed
3679       // to the next operand, if any. That avoids unneccessary error messages.
3680     }
3681     break;
3682   case AsmToken::Identifier: {
3683       OperandInfoTy Msg(ID_UNKNOWN_);
3684       OperandInfoTy Operation(OP_UNKNOWN_);
3685       int64_t StreamId = STREAM_ID_DEFAULT_;
3686       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3687         return MatchOperand_ParseFail;
3688       do {
3689         // Validate and encode message ID.
3690         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3691                 || Msg.Id == ID_SYSMSG)) {
3692           if (Msg.IsSymbolic)
3693             Error(S, "invalid/unsupported symbolic name of message");
3694           else
3695             Error(S, "invalid/unsupported code of message");
3696           break;
3697         }
3698         Imm16Val = (Msg.Id << ID_SHIFT_);
3699         // Validate and encode operation ID.
3700         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3701           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3702             if (Operation.IsSymbolic)
3703               Error(S, "invalid symbolic name of GS_OP");
3704             else
3705               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3706             break;
3707           }
3708           if (Operation.Id == OP_GS_NOP
3709               && Msg.Id != ID_GS_DONE) {
3710             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3711             break;
3712           }
3713           Imm16Val |= (Operation.Id << OP_SHIFT_);
3714         }
3715         if (Msg.Id == ID_SYSMSG) {
3716           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3717             if (Operation.IsSymbolic)
3718               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3719             else
3720               Error(S, "invalid/unsupported code of SYSMSG_OP");
3721             break;
3722           }
3723           Imm16Val |= (Operation.Id << OP_SHIFT_);
3724         }
3725         // Validate and encode stream ID.
3726         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3727           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3728             Error(S, "invalid stream id: only 2-bit values are legal");
3729             break;
3730           }
3731           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3732         }
3733       } while (false);
3734     }
3735     break;
3736   }
3737   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3738   return MatchOperand_Success;
3739 }
3740 
3741 bool AMDGPUOperand::isSendMsg() const {
3742   return isImmTy(ImmTySendMsg);
3743 }
3744 
3745 //===----------------------------------------------------------------------===//
3746 // parser helpers
3747 //===----------------------------------------------------------------------===//
3748 
3749 bool
3750 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3751   if (getLexer().getKind() == AsmToken::Identifier &&
3752       Parser.getTok().getString() == Id) {
3753     Parser.Lex();
3754     return true;
3755   }
3756   return false;
3757 }
3758 
3759 bool
3760 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3761   if (getLexer().getKind() == Kind) {
3762     Parser.Lex();
3763     return true;
3764   }
3765   return false;
3766 }
3767 
3768 bool
3769 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3770                            const StringRef ErrMsg) {
3771   if (!trySkipToken(Kind)) {
3772     Error(Parser.getTok().getLoc(), ErrMsg);
3773     return false;
3774   }
3775   return true;
3776 }
3777 
3778 bool
3779 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3780   return !getParser().parseAbsoluteExpression(Imm);
3781 }
3782 
3783 bool
3784 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3785   SMLoc S = Parser.getTok().getLoc();
3786   if (getLexer().getKind() == AsmToken::String) {
3787     Val = Parser.getTok().getStringContents();
3788     Parser.Lex();
3789     return true;
3790   } else {
3791     Error(S, ErrMsg);
3792     return false;
3793   }
3794 }
3795 
3796 //===----------------------------------------------------------------------===//
3797 // swizzle
3798 //===----------------------------------------------------------------------===//
3799 
3800 LLVM_READNONE
3801 static unsigned
3802 encodeBitmaskPerm(const unsigned AndMask,
3803                   const unsigned OrMask,
3804                   const unsigned XorMask) {
3805   using namespace llvm::AMDGPU::Swizzle;
3806 
3807   return BITMASK_PERM_ENC |
3808          (AndMask << BITMASK_AND_SHIFT) |
3809          (OrMask  << BITMASK_OR_SHIFT)  |
3810          (XorMask << BITMASK_XOR_SHIFT);
3811 }
3812 
3813 bool
3814 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3815                                       const unsigned MinVal,
3816                                       const unsigned MaxVal,
3817                                       const StringRef ErrMsg) {
3818   for (unsigned i = 0; i < OpNum; ++i) {
3819     if (!skipToken(AsmToken::Comma, "expected a comma")){
3820       return false;
3821     }
3822     SMLoc ExprLoc = Parser.getTok().getLoc();
3823     if (!parseExpr(Op[i])) {
3824       return false;
3825     }
3826     if (Op[i] < MinVal || Op[i] > MaxVal) {
3827       Error(ExprLoc, ErrMsg);
3828       return false;
3829     }
3830   }
3831 
3832   return true;
3833 }
3834 
3835 bool
3836 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3837   using namespace llvm::AMDGPU::Swizzle;
3838 
3839   int64_t Lane[LANE_NUM];
3840   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3841                            "expected a 2-bit lane id")) {
3842     Imm = QUAD_PERM_ENC;
3843     for (auto i = 0; i < LANE_NUM; ++i) {
3844       Imm |= Lane[i] << (LANE_SHIFT * i);
3845     }
3846     return true;
3847   }
3848   return false;
3849 }
3850 
3851 bool
3852 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3853   using namespace llvm::AMDGPU::Swizzle;
3854 
3855   SMLoc S = Parser.getTok().getLoc();
3856   int64_t GroupSize;
3857   int64_t LaneIdx;
3858 
3859   if (!parseSwizzleOperands(1, &GroupSize,
3860                             2, 32,
3861                             "group size must be in the interval [2,32]")) {
3862     return false;
3863   }
3864   if (!isPowerOf2_64(GroupSize)) {
3865     Error(S, "group size must be a power of two");
3866     return false;
3867   }
3868   if (parseSwizzleOperands(1, &LaneIdx,
3869                            0, GroupSize - 1,
3870                            "lane id must be in the interval [0,group size - 1]")) {
3871     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3872     return true;
3873   }
3874   return false;
3875 }
3876 
3877 bool
3878 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3879   using namespace llvm::AMDGPU::Swizzle;
3880 
3881   SMLoc S = Parser.getTok().getLoc();
3882   int64_t GroupSize;
3883 
3884   if (!parseSwizzleOperands(1, &GroupSize,
3885       2, 32, "group size must be in the interval [2,32]")) {
3886     return false;
3887   }
3888   if (!isPowerOf2_64(GroupSize)) {
3889     Error(S, "group size must be a power of two");
3890     return false;
3891   }
3892 
3893   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3894   return true;
3895 }
3896 
3897 bool
3898 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3899   using namespace llvm::AMDGPU::Swizzle;
3900 
3901   SMLoc S = Parser.getTok().getLoc();
3902   int64_t GroupSize;
3903 
3904   if (!parseSwizzleOperands(1, &GroupSize,
3905       1, 16, "group size must be in the interval [1,16]")) {
3906     return false;
3907   }
3908   if (!isPowerOf2_64(GroupSize)) {
3909     Error(S, "group size must be a power of two");
3910     return false;
3911   }
3912 
3913   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3914   return true;
3915 }
3916 
3917 bool
3918 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3919   using namespace llvm::AMDGPU::Swizzle;
3920 
3921   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3922     return false;
3923   }
3924 
3925   StringRef Ctl;
3926   SMLoc StrLoc = Parser.getTok().getLoc();
3927   if (!parseString(Ctl)) {
3928     return false;
3929   }
3930   if (Ctl.size() != BITMASK_WIDTH) {
3931     Error(StrLoc, "expected a 5-character mask");
3932     return false;
3933   }
3934 
3935   unsigned AndMask = 0;
3936   unsigned OrMask = 0;
3937   unsigned XorMask = 0;
3938 
3939   for (size_t i = 0; i < Ctl.size(); ++i) {
3940     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3941     switch(Ctl[i]) {
3942     default:
3943       Error(StrLoc, "invalid mask");
3944       return false;
3945     case '0':
3946       break;
3947     case '1':
3948       OrMask |= Mask;
3949       break;
3950     case 'p':
3951       AndMask |= Mask;
3952       break;
3953     case 'i':
3954       AndMask |= Mask;
3955       XorMask |= Mask;
3956       break;
3957     }
3958   }
3959 
3960   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3961   return true;
3962 }
3963 
3964 bool
3965 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3966 
3967   SMLoc OffsetLoc = Parser.getTok().getLoc();
3968 
3969   if (!parseExpr(Imm)) {
3970     return false;
3971   }
3972   if (!isUInt<16>(Imm)) {
3973     Error(OffsetLoc, "expected a 16-bit offset");
3974     return false;
3975   }
3976   return true;
3977 }
3978 
3979 bool
3980 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3981   using namespace llvm::AMDGPU::Swizzle;
3982 
3983   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3984 
3985     SMLoc ModeLoc = Parser.getTok().getLoc();
3986     bool Ok = false;
3987 
3988     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3989       Ok = parseSwizzleQuadPerm(Imm);
3990     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3991       Ok = parseSwizzleBitmaskPerm(Imm);
3992     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3993       Ok = parseSwizzleBroadcast(Imm);
3994     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3995       Ok = parseSwizzleSwap(Imm);
3996     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3997       Ok = parseSwizzleReverse(Imm);
3998     } else {
3999       Error(ModeLoc, "expected a swizzle mode");
4000     }
4001 
4002     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4003   }
4004 
4005   return false;
4006 }
4007 
4008 OperandMatchResultTy
4009 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4010   SMLoc S = Parser.getTok().getLoc();
4011   int64_t Imm = 0;
4012 
4013   if (trySkipId("offset")) {
4014 
4015     bool Ok = false;
4016     if (skipToken(AsmToken::Colon, "expected a colon")) {
4017       if (trySkipId("swizzle")) {
4018         Ok = parseSwizzleMacro(Imm);
4019       } else {
4020         Ok = parseSwizzleOffset(Imm);
4021       }
4022     }
4023 
4024     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4025 
4026     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4027   } else {
4028     // Swizzle "offset" operand is optional.
4029     // If it is omitted, try parsing other optional operands.
4030     return parseOptionalOpr(Operands);
4031   }
4032 }
4033 
4034 bool
4035 AMDGPUOperand::isSwizzle() const {
4036   return isImmTy(ImmTySwizzle);
4037 }
4038 
4039 //===----------------------------------------------------------------------===//
4040 // sopp branch targets
4041 //===----------------------------------------------------------------------===//
4042 
4043 OperandMatchResultTy
4044 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4045   SMLoc S = Parser.getTok().getLoc();
4046 
4047   switch (getLexer().getKind()) {
4048     default: return MatchOperand_ParseFail;
4049     case AsmToken::Integer: {
4050       int64_t Imm;
4051       if (getParser().parseAbsoluteExpression(Imm))
4052         return MatchOperand_ParseFail;
4053       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4054       return MatchOperand_Success;
4055     }
4056 
4057     case AsmToken::Identifier:
4058       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4059           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4060                                   Parser.getTok().getString()), getContext()), S));
4061       Parser.Lex();
4062       return MatchOperand_Success;
4063   }
4064 }
4065 
4066 //===----------------------------------------------------------------------===//
4067 // mubuf
4068 //===----------------------------------------------------------------------===//
4069 
4070 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4071   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4072 }
4073 
4074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4075   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4076 }
4077 
4078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
4079   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
4080 }
4081 
4082 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4083                                const OperandVector &Operands,
4084                                bool IsAtomic, bool IsAtomicReturn) {
4085   bool HasLdsModifier = false;
4086   OptionalImmIndexMap OptionalIdx;
4087   assert(IsAtomicReturn ? IsAtomic : true);
4088 
4089   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4090     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4091 
4092     // Add the register arguments
4093     if (Op.isReg()) {
4094       Op.addRegOperands(Inst, 1);
4095       continue;
4096     }
4097 
4098     // Handle the case where soffset is an immediate
4099     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4100       Op.addImmOperands(Inst, 1);
4101       continue;
4102     }
4103 
4104     HasLdsModifier = Op.isLDS();
4105 
4106     // Handle tokens like 'offen' which are sometimes hard-coded into the
4107     // asm string.  There are no MCInst operands for these.
4108     if (Op.isToken()) {
4109       continue;
4110     }
4111     assert(Op.isImm());
4112 
4113     // Handle optional arguments
4114     OptionalIdx[Op.getImmTy()] = i;
4115   }
4116 
4117   // This is a workaround for an llvm quirk which may result in an
4118   // incorrect instruction selection. Lds and non-lds versions of
4119   // MUBUF instructions are identical except that lds versions
4120   // have mandatory 'lds' modifier. However this modifier follows
4121   // optional modifiers and llvm asm matcher regards this 'lds'
4122   // modifier as an optional one. As a result, an lds version
4123   // of opcode may be selected even if it has no 'lds' modifier.
4124   if (!HasLdsModifier) {
4125     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4126     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4127       Inst.setOpcode(NoLdsOpcode);
4128     }
4129   }
4130 
4131   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4132   if (IsAtomicReturn) {
4133     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4134     Inst.insert(I, *I);
4135   }
4136 
4137   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4138   if (!IsAtomic) { // glc is hard-coded.
4139     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4140   }
4141   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4142 
4143   if (!HasLdsModifier) { // tfe is not legal with lds opcodes
4144     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4145   }
4146 }
4147 
4148 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4149   OptionalImmIndexMap OptionalIdx;
4150 
4151   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4152     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4153 
4154     // Add the register arguments
4155     if (Op.isReg()) {
4156       Op.addRegOperands(Inst, 1);
4157       continue;
4158     }
4159 
4160     // Handle the case where soffset is an immediate
4161     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4162       Op.addImmOperands(Inst, 1);
4163       continue;
4164     }
4165 
4166     // Handle tokens like 'offen' which are sometimes hard-coded into the
4167     // asm string.  There are no MCInst operands for these.
4168     if (Op.isToken()) {
4169       continue;
4170     }
4171     assert(Op.isImm());
4172 
4173     // Handle optional arguments
4174     OptionalIdx[Op.getImmTy()] = i;
4175   }
4176 
4177   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4178                         AMDGPUOperand::ImmTyOffset);
4179   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
4180   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
4181   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4182   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4183   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4184 }
4185 
4186 //===----------------------------------------------------------------------===//
4187 // mimg
4188 //===----------------------------------------------------------------------===//
4189 
4190 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4191                               bool IsAtomic) {
4192   unsigned I = 1;
4193   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4194   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4195     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4196   }
4197 
4198   if (IsAtomic) {
4199     // Add src, same as dst
4200     assert(Desc.getNumDefs() == 1);
4201     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4202   }
4203 
4204   OptionalImmIndexMap OptionalIdx;
4205 
4206   for (unsigned E = Operands.size(); I != E; ++I) {
4207     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4208 
4209     // Add the register arguments
4210     if (Op.isReg()) {
4211       Op.addRegOperands(Inst, 1);
4212     } else if (Op.isImmModifier()) {
4213       OptionalIdx[Op.getImmTy()] = I;
4214     } else {
4215       llvm_unreachable("unexpected operand type");
4216     }
4217   }
4218 
4219   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4220   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4221   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4222   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4223   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4224   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4225   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4226   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4227 }
4228 
4229 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4230   cvtMIMG(Inst, Operands, true);
4231 }
4232 
4233 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
4234   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
4235 }
4236 
4237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
4238   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
4239 }
4240 
4241 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
4242   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
4243 }
4244 
4245 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
4246   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
4247 }
4248 
4249 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
4250   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4251 }
4252 
4253 //===----------------------------------------------------------------------===//
4254 // smrd
4255 //===----------------------------------------------------------------------===//
4256 
4257 bool AMDGPUOperand::isSMRDOffset8() const {
4258   return isImm() && isUInt<8>(getImm());
4259 }
4260 
4261 bool AMDGPUOperand::isSMRDOffset20() const {
4262   return isImm() && isUInt<20>(getImm());
4263 }
4264 
4265 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4266   // 32-bit literals are only supported on CI and we only want to use them
4267   // when the offset is > 8-bits.
4268   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4269 }
4270 
4271 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4272   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4273 }
4274 
4275 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4276   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4277 }
4278 
4279 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4280   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4281 }
4282 
4283 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4284   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4285 }
4286 
4287 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4288   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4289 }
4290 
4291 //===----------------------------------------------------------------------===//
4292 // vop3
4293 //===----------------------------------------------------------------------===//
4294 
4295 static bool ConvertOmodMul(int64_t &Mul) {
4296   if (Mul != 1 && Mul != 2 && Mul != 4)
4297     return false;
4298 
4299   Mul >>= 1;
4300   return true;
4301 }
4302 
4303 static bool ConvertOmodDiv(int64_t &Div) {
4304   if (Div == 1) {
4305     Div = 0;
4306     return true;
4307   }
4308 
4309   if (Div == 2) {
4310     Div = 3;
4311     return true;
4312   }
4313 
4314   return false;
4315 }
4316 
4317 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4318   if (BoundCtrl == 0) {
4319     BoundCtrl = 1;
4320     return true;
4321   }
4322 
4323   if (BoundCtrl == -1) {
4324     BoundCtrl = 0;
4325     return true;
4326   }
4327 
4328   return false;
4329 }
4330 
4331 // Note: the order in this table matches the order of operands in AsmString.
4332 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4333   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4334   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4335   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4336   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4337   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4338   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4339   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
4340   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4341   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4342   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4343   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4344   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4345   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4346   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4347   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
4348   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4349   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4350   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4351   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4352   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4353   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4354   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4355   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4356   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4357   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4358   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4359   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4360   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4361   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4362   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4363   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4364   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4365   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4366   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4367   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4368   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4369 };
4370 
4371 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4372   unsigned size = Operands.size();
4373   assert(size > 0);
4374 
4375   OperandMatchResultTy res = parseOptionalOpr(Operands);
4376 
4377   // This is a hack to enable hardcoded mandatory operands which follow
4378   // optional operands.
4379   //
4380   // Current design assumes that all operands after the first optional operand
4381   // are also optional. However implementation of some instructions violates
4382   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4383   //
4384   // To alleviate this problem, we have to (implicitly) parse extra operands
4385   // to make sure autogenerated parser of custom operands never hit hardcoded
4386   // mandatory operands.
4387 
4388   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4389 
4390     // We have parsed the first optional operand.
4391     // Parse as many operands as necessary to skip all mandatory operands.
4392 
4393     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4394       if (res != MatchOperand_Success ||
4395           getLexer().is(AsmToken::EndOfStatement)) break;
4396       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4397       res = parseOptionalOpr(Operands);
4398     }
4399   }
4400 
4401   return res;
4402 }
4403 
4404 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4405   OperandMatchResultTy res;
4406   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4407     // try to parse any optional operand here
4408     if (Op.IsBit) {
4409       res = parseNamedBit(Op.Name, Operands, Op.Type);
4410     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4411       res = parseOModOperand(Operands);
4412     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4413                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4414                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4415       res = parseSDWASel(Operands, Op.Name, Op.Type);
4416     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4417       res = parseSDWADstUnused(Operands);
4418     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4419                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4420                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4421                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4422       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4423                                         Op.ConvertResult);
4424     } else {
4425       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4426     }
4427     if (res != MatchOperand_NoMatch) {
4428       return res;
4429     }
4430   }
4431   return MatchOperand_NoMatch;
4432 }
4433 
4434 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4435   StringRef Name = Parser.getTok().getString();
4436   if (Name == "mul") {
4437     return parseIntWithPrefix("mul", Operands,
4438                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4439   }
4440 
4441   if (Name == "div") {
4442     return parseIntWithPrefix("div", Operands,
4443                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4444   }
4445 
4446   return MatchOperand_NoMatch;
4447 }
4448 
4449 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4450   cvtVOP3P(Inst, Operands);
4451 
4452   int Opc = Inst.getOpcode();
4453 
4454   int SrcNum;
4455   const int Ops[] = { AMDGPU::OpName::src0,
4456                       AMDGPU::OpName::src1,
4457                       AMDGPU::OpName::src2 };
4458   for (SrcNum = 0;
4459        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4460        ++SrcNum);
4461   assert(SrcNum > 0);
4462 
4463   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4464   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4465 
4466   if ((OpSel & (1 << SrcNum)) != 0) {
4467     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4468     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4469     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4470   }
4471 }
4472 
4473 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4474       // 1. This operand is input modifiers
4475   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4476       // 2. This is not last operand
4477       && Desc.NumOperands > (OpNum + 1)
4478       // 3. Next operand is register class
4479       && Desc.OpInfo[OpNum + 1].RegClass != -1
4480       // 4. Next register is not tied to any other operand
4481       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4482 }
4483 
4484 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4485 {
4486   OptionalImmIndexMap OptionalIdx;
4487   unsigned Opc = Inst.getOpcode();
4488 
4489   unsigned I = 1;
4490   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4491   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4492     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4493   }
4494 
4495   for (unsigned E = Operands.size(); I != E; ++I) {
4496     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4497     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4498       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4499     } else if (Op.isInterpSlot() ||
4500                Op.isInterpAttr() ||
4501                Op.isAttrChan()) {
4502       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4503     } else if (Op.isImmModifier()) {
4504       OptionalIdx[Op.getImmTy()] = I;
4505     } else {
4506       llvm_unreachable("unhandled operand type");
4507     }
4508   }
4509 
4510   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4511     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4512   }
4513 
4514   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4515     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4516   }
4517 
4518   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4519     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4520   }
4521 }
4522 
4523 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4524                               OptionalImmIndexMap &OptionalIdx) {
4525   unsigned Opc = Inst.getOpcode();
4526 
4527   unsigned I = 1;
4528   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4529   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4530     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4531   }
4532 
4533   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4534     // This instruction has src modifiers
4535     for (unsigned E = Operands.size(); I != E; ++I) {
4536       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4537       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4538         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4539       } else if (Op.isImmModifier()) {
4540         OptionalIdx[Op.getImmTy()] = I;
4541       } else if (Op.isRegOrImm()) {
4542         Op.addRegOrImmOperands(Inst, 1);
4543       } else {
4544         llvm_unreachable("unhandled operand type");
4545       }
4546     }
4547   } else {
4548     // No src modifiers
4549     for (unsigned E = Operands.size(); I != E; ++I) {
4550       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4551       if (Op.isMod()) {
4552         OptionalIdx[Op.getImmTy()] = I;
4553       } else {
4554         Op.addRegOrImmOperands(Inst, 1);
4555       }
4556     }
4557   }
4558 
4559   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4560     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4561   }
4562 
4563   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4564     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4565   }
4566 
4567   // special case v_mac_{f16, f32}:
4568   // it has src2 register operand that is tied to dst operand
4569   // we don't allow modifiers for this operand in assembler so src2_modifiers
4570   // should be 0
4571   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4572       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4573     auto it = Inst.begin();
4574     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4575     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4576     ++it;
4577     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4578   }
4579 }
4580 
4581 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4582   OptionalImmIndexMap OptionalIdx;
4583   cvtVOP3(Inst, Operands, OptionalIdx);
4584 }
4585 
4586 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
4587                                const OperandVector &Operands) {
4588   OptionalImmIndexMap OptIdx;
4589   const int Opc = Inst.getOpcode();
4590   const MCInstrDesc &Desc = MII.get(Opc);
4591 
4592   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
4593 
4594   cvtVOP3(Inst, Operands, OptIdx);
4595 
4596   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4597     assert(!IsPacked);
4598     Inst.addOperand(Inst.getOperand(0));
4599   }
4600 
4601   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4602   // instruction, and then figure out where to actually put the modifiers
4603 
4604   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4605 
4606   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4607   if (OpSelHiIdx != -1) {
4608     int DefaultVal = IsPacked ? -1 : 0;
4609     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4610                           DefaultVal);
4611   }
4612 
4613   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4614   if (NegLoIdx != -1) {
4615     assert(IsPacked);
4616     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4617     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4618   }
4619 
4620   const int Ops[] = { AMDGPU::OpName::src0,
4621                       AMDGPU::OpName::src1,
4622                       AMDGPU::OpName::src2 };
4623   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4624                          AMDGPU::OpName::src1_modifiers,
4625                          AMDGPU::OpName::src2_modifiers };
4626 
4627   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4628 
4629   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4630   unsigned OpSelHi = 0;
4631   unsigned NegLo = 0;
4632   unsigned NegHi = 0;
4633 
4634   if (OpSelHiIdx != -1) {
4635     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4636   }
4637 
4638   if (NegLoIdx != -1) {
4639     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4640     NegLo = Inst.getOperand(NegLoIdx).getImm();
4641     NegHi = Inst.getOperand(NegHiIdx).getImm();
4642   }
4643 
4644   for (int J = 0; J < 3; ++J) {
4645     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4646     if (OpIdx == -1)
4647       break;
4648 
4649     uint32_t ModVal = 0;
4650 
4651     if ((OpSel & (1 << J)) != 0)
4652       ModVal |= SISrcMods::OP_SEL_0;
4653 
4654     if ((OpSelHi & (1 << J)) != 0)
4655       ModVal |= SISrcMods::OP_SEL_1;
4656 
4657     if ((NegLo & (1 << J)) != 0)
4658       ModVal |= SISrcMods::NEG;
4659 
4660     if ((NegHi & (1 << J)) != 0)
4661       ModVal |= SISrcMods::NEG_HI;
4662 
4663     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4664 
4665     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4666   }
4667 }
4668 
4669 //===----------------------------------------------------------------------===//
4670 // dpp
4671 //===----------------------------------------------------------------------===//
4672 
4673 bool AMDGPUOperand::isDPPCtrl() const {
4674   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4675   if (result) {
4676     int64_t Imm = getImm();
4677     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4678            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4679            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4680            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4681            (Imm == 0x130) ||
4682            (Imm == 0x134) ||
4683            (Imm == 0x138) ||
4684            (Imm == 0x13c) ||
4685            (Imm == 0x140) ||
4686            (Imm == 0x141) ||
4687            (Imm == 0x142) ||
4688            (Imm == 0x143);
4689   }
4690   return false;
4691 }
4692 
4693 bool AMDGPUOperand::isGPRIdxMode() const {
4694   return isImm() && isUInt<4>(getImm());
4695 }
4696 
4697 bool AMDGPUOperand::isS16Imm() const {
4698   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4699 }
4700 
4701 bool AMDGPUOperand::isU16Imm() const {
4702   return isImm() && isUInt<16>(getImm());
4703 }
4704 
4705 OperandMatchResultTy
4706 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4707   SMLoc S = Parser.getTok().getLoc();
4708   StringRef Prefix;
4709   int64_t Int;
4710 
4711   if (getLexer().getKind() == AsmToken::Identifier) {
4712     Prefix = Parser.getTok().getString();
4713   } else {
4714     return MatchOperand_NoMatch;
4715   }
4716 
4717   if (Prefix == "row_mirror") {
4718     Int = 0x140;
4719     Parser.Lex();
4720   } else if (Prefix == "row_half_mirror") {
4721     Int = 0x141;
4722     Parser.Lex();
4723   } else {
4724     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4725     if (Prefix != "quad_perm"
4726         && Prefix != "row_shl"
4727         && Prefix != "row_shr"
4728         && Prefix != "row_ror"
4729         && Prefix != "wave_shl"
4730         && Prefix != "wave_rol"
4731         && Prefix != "wave_shr"
4732         && Prefix != "wave_ror"
4733         && Prefix != "row_bcast") {
4734       return MatchOperand_NoMatch;
4735     }
4736 
4737     Parser.Lex();
4738     if (getLexer().isNot(AsmToken::Colon))
4739       return MatchOperand_ParseFail;
4740 
4741     if (Prefix == "quad_perm") {
4742       // quad_perm:[%d,%d,%d,%d]
4743       Parser.Lex();
4744       if (getLexer().isNot(AsmToken::LBrac))
4745         return MatchOperand_ParseFail;
4746       Parser.Lex();
4747 
4748       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4749         return MatchOperand_ParseFail;
4750 
4751       for (int i = 0; i < 3; ++i) {
4752         if (getLexer().isNot(AsmToken::Comma))
4753           return MatchOperand_ParseFail;
4754         Parser.Lex();
4755 
4756         int64_t Temp;
4757         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4758           return MatchOperand_ParseFail;
4759         const int shift = i*2 + 2;
4760         Int += (Temp << shift);
4761       }
4762 
4763       if (getLexer().isNot(AsmToken::RBrac))
4764         return MatchOperand_ParseFail;
4765       Parser.Lex();
4766     } else {
4767       // sel:%d
4768       Parser.Lex();
4769       if (getParser().parseAbsoluteExpression(Int))
4770         return MatchOperand_ParseFail;
4771 
4772       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4773         Int |= 0x100;
4774       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4775         Int |= 0x110;
4776       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4777         Int |= 0x120;
4778       } else if (Prefix == "wave_shl" && 1 == Int) {
4779         Int = 0x130;
4780       } else if (Prefix == "wave_rol" && 1 == Int) {
4781         Int = 0x134;
4782       } else if (Prefix == "wave_shr" && 1 == Int) {
4783         Int = 0x138;
4784       } else if (Prefix == "wave_ror" && 1 == Int) {
4785         Int = 0x13C;
4786       } else if (Prefix == "row_bcast") {
4787         if (Int == 15) {
4788           Int = 0x142;
4789         } else if (Int == 31) {
4790           Int = 0x143;
4791         } else {
4792           return MatchOperand_ParseFail;
4793         }
4794       } else {
4795         return MatchOperand_ParseFail;
4796       }
4797     }
4798   }
4799 
4800   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4801   return MatchOperand_Success;
4802 }
4803 
4804 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4805   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4806 }
4807 
4808 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4809   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4810 }
4811 
4812 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4813   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4814 }
4815 
4816 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4817   OptionalImmIndexMap OptionalIdx;
4818 
4819   unsigned I = 1;
4820   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4821   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4822     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4823   }
4824 
4825   // All DPP instructions with at least one source operand have a fake "old"
4826   // source at the beginning that's tied to the dst operand. Handle it here.
4827   if (Desc.getNumOperands() >= 2)
4828     Inst.addOperand(Inst.getOperand(0));
4829 
4830   for (unsigned E = Operands.size(); I != E; ++I) {
4831     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4832     // Add the register arguments
4833     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4834       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4835       // Skip it.
4836       continue;
4837     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4838       Op.addRegWithFPInputModsOperands(Inst, 2);
4839     } else if (Op.isDPPCtrl()) {
4840       Op.addImmOperands(Inst, 1);
4841     } else if (Op.isImm()) {
4842       // Handle optional arguments
4843       OptionalIdx[Op.getImmTy()] = I;
4844     } else {
4845       llvm_unreachable("Invalid operand type");
4846     }
4847   }
4848 
4849   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4850   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4851   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4852 }
4853 
4854 //===----------------------------------------------------------------------===//
4855 // sdwa
4856 //===----------------------------------------------------------------------===//
4857 
4858 OperandMatchResultTy
4859 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4860                               AMDGPUOperand::ImmTy Type) {
4861   using namespace llvm::AMDGPU::SDWA;
4862 
4863   SMLoc S = Parser.getTok().getLoc();
4864   StringRef Value;
4865   OperandMatchResultTy res;
4866 
4867   res = parseStringWithPrefix(Prefix, Value);
4868   if (res != MatchOperand_Success) {
4869     return res;
4870   }
4871 
4872   int64_t Int;
4873   Int = StringSwitch<int64_t>(Value)
4874         .Case("BYTE_0", SdwaSel::BYTE_0)
4875         .Case("BYTE_1", SdwaSel::BYTE_1)
4876         .Case("BYTE_2", SdwaSel::BYTE_2)
4877         .Case("BYTE_3", SdwaSel::BYTE_3)
4878         .Case("WORD_0", SdwaSel::WORD_0)
4879         .Case("WORD_1", SdwaSel::WORD_1)
4880         .Case("DWORD", SdwaSel::DWORD)
4881         .Default(0xffffffff);
4882   Parser.Lex(); // eat last token
4883 
4884   if (Int == 0xffffffff) {
4885     return MatchOperand_ParseFail;
4886   }
4887 
4888   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4889   return MatchOperand_Success;
4890 }
4891 
4892 OperandMatchResultTy
4893 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4894   using namespace llvm::AMDGPU::SDWA;
4895 
4896   SMLoc S = Parser.getTok().getLoc();
4897   StringRef Value;
4898   OperandMatchResultTy res;
4899 
4900   res = parseStringWithPrefix("dst_unused", Value);
4901   if (res != MatchOperand_Success) {
4902     return res;
4903   }
4904 
4905   int64_t Int;
4906   Int = StringSwitch<int64_t>(Value)
4907         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4908         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4909         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4910         .Default(0xffffffff);
4911   Parser.Lex(); // eat last token
4912 
4913   if (Int == 0xffffffff) {
4914     return MatchOperand_ParseFail;
4915   }
4916 
4917   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4918   return MatchOperand_Success;
4919 }
4920 
4921 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4922   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4923 }
4924 
4925 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4926   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4927 }
4928 
4929 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4930   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4931 }
4932 
4933 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4934   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4935 }
4936 
4937 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4938                               uint64_t BasicInstType, bool skipVcc) {
4939   using namespace llvm::AMDGPU::SDWA;
4940 
4941   OptionalImmIndexMap OptionalIdx;
4942   bool skippedVcc = false;
4943 
4944   unsigned I = 1;
4945   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4946   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4947     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4948   }
4949 
4950   for (unsigned E = Operands.size(); I != E; ++I) {
4951     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4952     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4953       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4954       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4955       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4956       // Skip VCC only if we didn't skip it on previous iteration.
4957       if (BasicInstType == SIInstrFlags::VOP2 &&
4958           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4959         skippedVcc = true;
4960         continue;
4961       } else if (BasicInstType == SIInstrFlags::VOPC &&
4962                  Inst.getNumOperands() == 0) {
4963         skippedVcc = true;
4964         continue;
4965       }
4966     }
4967     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4968       Op.addRegOrImmWithInputModsOperands(Inst, 2);
4969     } else if (Op.isImm()) {
4970       // Handle optional arguments
4971       OptionalIdx[Op.getImmTy()] = I;
4972     } else {
4973       llvm_unreachable("Invalid operand type");
4974     }
4975     skippedVcc = false;
4976   }
4977 
4978   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4979       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4980     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4981     switch (BasicInstType) {
4982     case SIInstrFlags::VOP1:
4983       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4984       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4985         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4986       }
4987       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4988       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4989       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4990       break;
4991 
4992     case SIInstrFlags::VOP2:
4993       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4994       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4995         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4996       }
4997       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4998       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4999       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5000       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5001       break;
5002 
5003     case SIInstrFlags::VOPC:
5004       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5005       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5006       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5007       break;
5008 
5009     default:
5010       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5011     }
5012   }
5013 
5014   // special case v_mac_{f16, f32}:
5015   // it has src2 register operand that is tied to dst operand
5016   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5017       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5018     auto it = Inst.begin();
5019     std::advance(
5020       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5021     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5022   }
5023 }
5024 
5025 /// Force static initialization.
5026 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5027   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5028   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5029 }
5030 
5031 #define GET_REGISTER_MATCHER
5032 #define GET_MATCHER_IMPLEMENTATION
5033 #define GET_MNEMONIC_SPELL_CHECKER
5034 #include "AMDGPUGenAsmMatcher.inc"
5035 
5036 // This fuction should be defined after auto-generated include so that we have
5037 // MatchClassKind enum defined
5038 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5039                                                      unsigned Kind) {
5040   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5041   // But MatchInstructionImpl() expects to meet token and fails to validate
5042   // operand. This method checks if we are given immediate operand but expect to
5043   // get corresponding token.
5044   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5045   switch (Kind) {
5046   case MCK_addr64:
5047     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5048   case MCK_gds:
5049     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5050   case MCK_lds:
5051     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5052   case MCK_glc:
5053     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5054   case MCK_d16:
5055     return Operand.isD16() ? Match_Success : Match_InvalidOperand;
5056   case MCK_idxen:
5057     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5058   case MCK_offen:
5059     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5060   case MCK_SSrcB32:
5061     // When operands have expression values, they will return true for isToken,
5062     // because it is not possible to distinguish between a token and an
5063     // expression at parse time. MatchInstructionImpl() will always try to
5064     // match an operand as a token, when isToken returns true, and when the
5065     // name of the expression is not a valid token, the match will fail,
5066     // so we need to handle it here.
5067     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5068   case MCK_SSrcF32:
5069     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5070   case MCK_SoppBrTarget:
5071     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5072   case MCK_VReg32OrOff:
5073     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5074   case MCK_InterpSlot:
5075     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5076   case MCK_Attr:
5077     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5078   case MCK_AttrChan:
5079     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5080   default:
5081     return Match_InvalidOperand;
5082   }
5083 }
5084