1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyEndpgm,
183     ImmTyHigh
184   };
185 
186 private:
187   struct TokOp {
188     const char *Data;
189     unsigned Length;
190   };
191 
192   struct ImmOp {
193     int64_t Val;
194     ImmTy Type;
195     bool IsFPImm;
196     Modifiers Mods;
197   };
198 
199   struct RegOp {
200     unsigned RegNo;
201     Modifiers Mods;
202   };
203 
204   union {
205     TokOp Tok;
206     ImmOp Imm;
207     RegOp Reg;
208     const MCExpr *Expr;
209   };
210 
211 public:
212   bool isToken() const override {
213     if (Kind == Token)
214       return true;
215 
216     if (Kind != Expression || !Expr)
217       return false;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isa<MCSymbolRefExpr>(Expr);
224   }
225 
226   bool isImm() const override {
227     return Kind == Immediate;
228   }
229 
230   bool isInlinableImm(MVT type) const;
231   bool isLiteralImm(MVT type) const;
232 
233   bool isRegKind() const {
234     return Kind == Register;
235   }
236 
237   bool isReg() const override {
238     return isRegKind() && !hasModifiers();
239   }
240 
241   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
242     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
243   }
244 
245   bool isRegOrImmWithInt16InputMods() const {
246     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
247   }
248 
249   bool isRegOrImmWithInt32InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
251   }
252 
253   bool isRegOrImmWithInt64InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
255   }
256 
257   bool isRegOrImmWithFP16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
259   }
260 
261   bool isRegOrImmWithFP32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
263   }
264 
265   bool isRegOrImmWithFP64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
267   }
268 
269   bool isVReg() const {
270     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
271            isRegClass(AMDGPU::VReg_64RegClassID) ||
272            isRegClass(AMDGPU::VReg_96RegClassID) ||
273            isRegClass(AMDGPU::VReg_128RegClassID) ||
274            isRegClass(AMDGPU::VReg_256RegClassID) ||
275            isRegClass(AMDGPU::VReg_512RegClassID);
276   }
277 
278   bool isVReg32() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID);
280   }
281 
282   bool isVReg32OrOff() const {
283     return isOff() || isVReg32();
284   }
285 
286   bool isSDWAOperand(MVT type) const;
287   bool isSDWAFP16Operand() const;
288   bool isSDWAFP32Operand() const;
289   bool isSDWAInt16Operand() const;
290   bool isSDWAInt32Operand() const;
291 
292   bool isImmTy(ImmTy ImmT) const {
293     return isImm() && Imm.Type == ImmT;
294   }
295 
296   bool isImmModifier() const {
297     return isImm() && Imm.Type != ImmTyNone;
298   }
299 
300   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
301   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
302   bool isDMask() const { return isImmTy(ImmTyDMask); }
303   bool isDim() const { return isImmTy(ImmTyDim); }
304   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
305   bool isDA() const { return isImmTy(ImmTyDA); }
306   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
307   bool isLWE() const { return isImmTy(ImmTyLWE); }
308   bool isOff() const { return isImmTy(ImmTyOff); }
309   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
310   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
311   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
312   bool isOffen() const { return isImmTy(ImmTyOffen); }
313   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
314   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
315   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
316   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
317   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
318 
319   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
320   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
321   bool isGDS() const { return isImmTy(ImmTyGDS); }
322   bool isLDS() const { return isImmTy(ImmTyLDS); }
323   bool isDLC() const { return isImmTy(ImmTyDLC); }
324   bool isGLC() const { return isImmTy(ImmTyGLC); }
325   bool isSLC() const { return isImmTy(ImmTySLC); }
326   bool isTFE() const { return isImmTy(ImmTyTFE); }
327   bool isD16() const { return isImmTy(ImmTyD16); }
328   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
329   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
330   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
331   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
332   bool isFI() const { return isImmTy(ImmTyDppFi); }
333   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
334   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
335   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
336   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
337   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
338   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
339   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
340   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
341   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
342   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
343   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
344   bool isHigh() const { return isImmTy(ImmTyHigh); }
345 
346   bool isMod() const {
347     return isClampSI() || isOModSI();
348   }
349 
350   bool isRegOrImm() const {
351     return isReg() || isImm();
352   }
353 
354   bool isRegClass(unsigned RCID) const;
355 
356   bool isInlineValue() const;
357 
358   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
359     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
360   }
361 
362   bool isSCSrcB16() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
364   }
365 
366   bool isSCSrcV2B16() const {
367     return isSCSrcB16();
368   }
369 
370   bool isSCSrcB32() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
372   }
373 
374   bool isSCSrcB64() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
376   }
377 
378   bool isBoolReg() const;
379 
380   bool isSCSrcF16() const {
381     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
382   }
383 
384   bool isSCSrcV2F16() const {
385     return isSCSrcF16();
386   }
387 
388   bool isSCSrcF32() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
390   }
391 
392   bool isSCSrcF64() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
394   }
395 
396   bool isSSrcB32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
398   }
399 
400   bool isSSrcB16() const {
401     return isSCSrcB16() || isLiteralImm(MVT::i16);
402   }
403 
404   bool isSSrcV2B16() const {
405     llvm_unreachable("cannot happen");
406     return isSSrcB16();
407   }
408 
409   bool isSSrcB64() const {
410     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
411     // See isVSrc64().
412     return isSCSrcB64() || isLiteralImm(MVT::i64);
413   }
414 
415   bool isSSrcF32() const {
416     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
417   }
418 
419   bool isSSrcF64() const {
420     return isSCSrcB64() || isLiteralImm(MVT::f64);
421   }
422 
423   bool isSSrcF16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::f16);
425   }
426 
427   bool isSSrcV2F16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcF16();
430   }
431 
432   bool isSSrcOrLdsB32() const {
433     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
434            isLiteralImm(MVT::i32) || isExpr();
435   }
436 
437   bool isVCSrcB32() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
439   }
440 
441   bool isVCSrcB64() const {
442     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
443   }
444 
445   bool isVCSrcB16() const {
446     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
447   }
448 
449   bool isVCSrcV2B16() const {
450     return isVCSrcB16();
451   }
452 
453   bool isVCSrcF32() const {
454     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
455   }
456 
457   bool isVCSrcF64() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
459   }
460 
461   bool isVCSrcF16() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
463   }
464 
465   bool isVCSrcV2F16() const {
466     return isVCSrcF16();
467   }
468 
469   bool isVSrcB32() const {
470     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVSrcB64() const {
474     return isVCSrcF64() || isLiteralImm(MVT::i64);
475   }
476 
477   bool isVSrcB16() const {
478     return isVCSrcF16() || isLiteralImm(MVT::i16);
479   }
480 
481   bool isVSrcV2B16() const {
482     return isVSrcB16() || isLiteralImm(MVT::v2i16);
483   }
484 
485   bool isVSrcF32() const {
486     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
487   }
488 
489   bool isVSrcF64() const {
490     return isVCSrcF64() || isLiteralImm(MVT::f64);
491   }
492 
493   bool isVSrcF16() const {
494     return isVCSrcF16() || isLiteralImm(MVT::f16);
495   }
496 
497   bool isVSrcV2F16() const {
498     return isVSrcF16() || isLiteralImm(MVT::v2f16);
499   }
500 
501   bool isKImmFP32() const {
502     return isLiteralImm(MVT::f32);
503   }
504 
505   bool isKImmFP16() const {
506     return isLiteralImm(MVT::f16);
507   }
508 
509   bool isMem() const override {
510     return false;
511   }
512 
513   bool isExpr() const {
514     return Kind == Expression;
515   }
516 
517   bool isSoppBrTarget() const {
518     return isExpr() || isImm();
519   }
520 
521   bool isSWaitCnt() const;
522   bool isHwreg() const;
523   bool isSendMsg() const;
524   bool isSwizzle() const;
525   bool isSMRDOffset8() const;
526   bool isSMRDOffset20() const;
527   bool isSMRDLiteralOffset() const;
528   bool isDPP8() const;
529   bool isDPPCtrl() const;
530   bool isGPRIdxMode() const;
531   bool isS16Imm() const;
532   bool isU16Imm() const;
533   bool isEndpgm() const;
534 
535   StringRef getExpressionAsToken() const {
536     assert(isExpr());
537     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
538     return S->getSymbol().getName();
539   }
540 
541   StringRef getToken() const {
542     assert(isToken());
543 
544     if (Kind == Expression)
545       return getExpressionAsToken();
546 
547     return StringRef(Tok.Data, Tok.Length);
548   }
549 
550   int64_t getImm() const {
551     assert(isImm());
552     return Imm.Val;
553   }
554 
555   ImmTy getImmTy() const {
556     assert(isImm());
557     return Imm.Type;
558   }
559 
560   unsigned getReg() const override {
561     assert(isRegKind());
562     return Reg.RegNo;
563   }
564 
565   SMLoc getStartLoc() const override {
566     return StartLoc;
567   }
568 
569   SMLoc getEndLoc() const override {
570     return EndLoc;
571   }
572 
573   SMRange getLocRange() const {
574     return SMRange(StartLoc, EndLoc);
575   }
576 
577   Modifiers getModifiers() const {
578     assert(isRegKind() || isImmTy(ImmTyNone));
579     return isRegKind() ? Reg.Mods : Imm.Mods;
580   }
581 
582   void setModifiers(Modifiers Mods) {
583     assert(isRegKind() || isImmTy(ImmTyNone));
584     if (isRegKind())
585       Reg.Mods = Mods;
586     else
587       Imm.Mods = Mods;
588   }
589 
590   bool hasModifiers() const {
591     return getModifiers().hasModifiers();
592   }
593 
594   bool hasFPModifiers() const {
595     return getModifiers().hasFPModifiers();
596   }
597 
598   bool hasIntModifiers() const {
599     return getModifiers().hasIntModifiers();
600   }
601 
602   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
603 
604   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
605 
606   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
607 
608   template <unsigned Bitwidth>
609   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
610 
611   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
612     addKImmFPOperands<16>(Inst, N);
613   }
614 
615   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
616     addKImmFPOperands<32>(Inst, N);
617   }
618 
619   void addRegOperands(MCInst &Inst, unsigned N) const;
620 
621   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
622     addRegOperands(Inst, N);
623   }
624 
625   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
626     if (isRegKind())
627       addRegOperands(Inst, N);
628     else if (isExpr())
629       Inst.addOperand(MCOperand::createExpr(Expr));
630     else
631       addImmOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
635     Modifiers Mods = getModifiers();
636     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
637     if (isRegKind()) {
638       addRegOperands(Inst, N);
639     } else {
640       addImmOperands(Inst, N, false);
641     }
642   }
643 
644   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
645     assert(!hasIntModifiers());
646     addRegOrImmWithInputModsOperands(Inst, N);
647   }
648 
649   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
650     assert(!hasFPModifiers());
651     addRegOrImmWithInputModsOperands(Inst, N);
652   }
653 
654   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
655     Modifiers Mods = getModifiers();
656     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
657     assert(isRegKind());
658     addRegOperands(Inst, N);
659   }
660 
661   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
662     assert(!hasIntModifiers());
663     addRegWithInputModsOperands(Inst, N);
664   }
665 
666   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
667     assert(!hasFPModifiers());
668     addRegWithInputModsOperands(Inst, N);
669   }
670 
671   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
672     if (isImm())
673       addImmOperands(Inst, N);
674     else {
675       assert(isExpr());
676       Inst.addOperand(MCOperand::createExpr(Expr));
677     }
678   }
679 
680   static void printImmTy(raw_ostream& OS, ImmTy Type) {
681     switch (Type) {
682     case ImmTyNone: OS << "None"; break;
683     case ImmTyGDS: OS << "GDS"; break;
684     case ImmTyLDS: OS << "LDS"; break;
685     case ImmTyOffen: OS << "Offen"; break;
686     case ImmTyIdxen: OS << "Idxen"; break;
687     case ImmTyAddr64: OS << "Addr64"; break;
688     case ImmTyOffset: OS << "Offset"; break;
689     case ImmTyInstOffset: OS << "InstOffset"; break;
690     case ImmTyOffset0: OS << "Offset0"; break;
691     case ImmTyOffset1: OS << "Offset1"; break;
692     case ImmTyDLC: OS << "DLC"; break;
693     case ImmTyGLC: OS << "GLC"; break;
694     case ImmTySLC: OS << "SLC"; break;
695     case ImmTyTFE: OS << "TFE"; break;
696     case ImmTyD16: OS << "D16"; break;
697     case ImmTyFORMAT: OS << "FORMAT"; break;
698     case ImmTyClampSI: OS << "ClampSI"; break;
699     case ImmTyOModSI: OS << "OModSI"; break;
700     case ImmTyDPP8: OS << "DPP8"; break;
701     case ImmTyDppCtrl: OS << "DppCtrl"; break;
702     case ImmTyDppRowMask: OS << "DppRowMask"; break;
703     case ImmTyDppBankMask: OS << "DppBankMask"; break;
704     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
705     case ImmTyDppFi: OS << "FI"; break;
706     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
707     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
708     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
709     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
710     case ImmTyDMask: OS << "DMask"; break;
711     case ImmTyDim: OS << "Dim"; break;
712     case ImmTyUNorm: OS << "UNorm"; break;
713     case ImmTyDA: OS << "DA"; break;
714     case ImmTyR128A16: OS << "R128A16"; break;
715     case ImmTyLWE: OS << "LWE"; break;
716     case ImmTyOff: OS << "Off"; break;
717     case ImmTyExpTgt: OS << "ExpTgt"; break;
718     case ImmTyExpCompr: OS << "ExpCompr"; break;
719     case ImmTyExpVM: OS << "ExpVM"; break;
720     case ImmTyHwreg: OS << "Hwreg"; break;
721     case ImmTySendMsg: OS << "SendMsg"; break;
722     case ImmTyInterpSlot: OS << "InterpSlot"; break;
723     case ImmTyInterpAttr: OS << "InterpAttr"; break;
724     case ImmTyAttrChan: OS << "AttrChan"; break;
725     case ImmTyOpSel: OS << "OpSel"; break;
726     case ImmTyOpSelHi: OS << "OpSelHi"; break;
727     case ImmTyNegLo: OS << "NegLo"; break;
728     case ImmTyNegHi: OS << "NegHi"; break;
729     case ImmTySwizzle: OS << "Swizzle"; break;
730     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
731     case ImmTyHigh: OS << "High"; break;
732     case ImmTyEndpgm:
733       OS << "Endpgm";
734       break;
735     }
736   }
737 
738   void print(raw_ostream &OS) const override {
739     switch (Kind) {
740     case Register:
741       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
742       break;
743     case Immediate:
744       OS << '<' << getImm();
745       if (getImmTy() != ImmTyNone) {
746         OS << " type: "; printImmTy(OS, getImmTy());
747       }
748       OS << " mods: " << Imm.Mods << '>';
749       break;
750     case Token:
751       OS << '\'' << getToken() << '\'';
752       break;
753     case Expression:
754       OS << "<expr " << *Expr << '>';
755       break;
756     }
757   }
758 
759   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
760                                       int64_t Val, SMLoc Loc,
761                                       ImmTy Type = ImmTyNone,
762                                       bool IsFPImm = false) {
763     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
764     Op->Imm.Val = Val;
765     Op->Imm.IsFPImm = IsFPImm;
766     Op->Imm.Type = Type;
767     Op->Imm.Mods = Modifiers();
768     Op->StartLoc = Loc;
769     Op->EndLoc = Loc;
770     return Op;
771   }
772 
773   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
774                                         StringRef Str, SMLoc Loc,
775                                         bool HasExplicitEncodingSize = true) {
776     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
777     Res->Tok.Data = Str.data();
778     Res->Tok.Length = Str.size();
779     Res->StartLoc = Loc;
780     Res->EndLoc = Loc;
781     return Res;
782   }
783 
784   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
785                                       unsigned RegNo, SMLoc S,
786                                       SMLoc E) {
787     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
788     Op->Reg.RegNo = RegNo;
789     Op->Reg.Mods = Modifiers();
790     Op->StartLoc = S;
791     Op->EndLoc = E;
792     return Op;
793   }
794 
795   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
796                                        const class MCExpr *Expr, SMLoc S) {
797     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
798     Op->Expr = Expr;
799     Op->StartLoc = S;
800     Op->EndLoc = S;
801     return Op;
802   }
803 };
804 
805 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
806   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
807   return OS;
808 }
809 
810 //===----------------------------------------------------------------------===//
811 // AsmParser
812 //===----------------------------------------------------------------------===//
813 
814 // Holds info related to the current kernel, e.g. count of SGPRs used.
815 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
816 // .amdgpu_hsa_kernel or at EOF.
817 class KernelScopeInfo {
818   int SgprIndexUnusedMin = -1;
819   int VgprIndexUnusedMin = -1;
820   MCContext *Ctx = nullptr;
821 
822   void usesSgprAt(int i) {
823     if (i >= SgprIndexUnusedMin) {
824       SgprIndexUnusedMin = ++i;
825       if (Ctx) {
826         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
827         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
828       }
829     }
830   }
831 
832   void usesVgprAt(int i) {
833     if (i >= VgprIndexUnusedMin) {
834       VgprIndexUnusedMin = ++i;
835       if (Ctx) {
836         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
837         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
838       }
839     }
840   }
841 
842 public:
843   KernelScopeInfo() = default;
844 
845   void initialize(MCContext &Context) {
846     Ctx = &Context;
847     usesSgprAt(SgprIndexUnusedMin = -1);
848     usesVgprAt(VgprIndexUnusedMin = -1);
849   }
850 
851   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
852     switch (RegKind) {
853       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
854       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
855       default: break;
856     }
857   }
858 };
859 
860 class AMDGPUAsmParser : public MCTargetAsmParser {
861   MCAsmParser &Parser;
862 
863   // Number of extra operands parsed after the first optional operand.
864   // This may be necessary to skip hardcoded mandatory operands.
865   static const unsigned MAX_OPR_LOOKAHEAD = 8;
866 
867   unsigned ForcedEncodingSize = 0;
868   bool ForcedDPP = false;
869   bool ForcedSDWA = false;
870   KernelScopeInfo KernelScope;
871 
872   /// @name Auto-generated Match Functions
873   /// {
874 
875 #define GET_ASSEMBLER_HEADER
876 #include "AMDGPUGenAsmMatcher.inc"
877 
878   /// }
879 
880 private:
881   bool ParseAsAbsoluteExpression(uint32_t &Ret);
882   bool OutOfRangeError(SMRange Range);
883   /// Calculate VGPR/SGPR blocks required for given target, reserved
884   /// registers, and user-specified NextFreeXGPR values.
885   ///
886   /// \param Features [in] Target features, used for bug corrections.
887   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
888   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
889   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
890   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
891   /// descriptor field, if valid.
892   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
893   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
894   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
895   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
896   /// \param VGPRBlocks [out] Result VGPR block count.
897   /// \param SGPRBlocks [out] Result SGPR block count.
898   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
899                           bool FlatScrUsed, bool XNACKUsed,
900                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
901                           SMRange VGPRRange, unsigned NextFreeSGPR,
902                           SMRange SGPRRange, unsigned &VGPRBlocks,
903                           unsigned &SGPRBlocks);
904   bool ParseDirectiveAMDGCNTarget();
905   bool ParseDirectiveAMDHSAKernel();
906   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
907   bool ParseDirectiveHSACodeObjectVersion();
908   bool ParseDirectiveHSACodeObjectISA();
909   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
910   bool ParseDirectiveAMDKernelCodeT();
911   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
912   bool ParseDirectiveAMDGPUHsaKernel();
913 
914   bool ParseDirectiveISAVersion();
915   bool ParseDirectiveHSAMetadata();
916   bool ParseDirectivePALMetadataBegin();
917   bool ParseDirectivePALMetadata();
918 
919   /// Common code to parse out a block of text (typically YAML) between start and
920   /// end directives.
921   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
922                            const char *AssemblerDirectiveEnd,
923                            std::string &CollectString);
924 
925   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
926                              RegisterKind RegKind, unsigned Reg1,
927                              unsigned RegNum);
928   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
929                            unsigned& RegNum, unsigned& RegWidth,
930                            unsigned *DwordRegIndex);
931   bool isRegister();
932   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
933   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
934   void initializeGprCountSymbol(RegisterKind RegKind);
935   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
936                              unsigned RegWidth);
937   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
938                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
939   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
940                  bool IsGdsHardcoded);
941 
942 public:
943   enum AMDGPUMatchResultTy {
944     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
945   };
946   enum OperandMode {
947     OperandMode_Default,
948     OperandMode_NSA,
949   };
950 
951   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
952 
953   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
954                const MCInstrInfo &MII,
955                const MCTargetOptions &Options)
956       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
957     MCAsmParserExtension::Initialize(Parser);
958 
959     if (getFeatureBits().none()) {
960       // Set default features.
961       copySTI().ToggleFeature("southern-islands");
962     }
963 
964     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
965 
966     {
967       // TODO: make those pre-defined variables read-only.
968       // Currently there is none suitable machinery in the core llvm-mc for this.
969       // MCSymbol::isRedefinable is intended for another purpose, and
970       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
971       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
972       MCContext &Ctx = getContext();
973       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
974         MCSymbol *Sym =
975             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
976         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
977         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
978         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
979         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
980         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
981       } else {
982         MCSymbol *Sym =
983             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
984         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
985         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
986         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
987         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
988         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
989       }
990       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
991         initializeGprCountSymbol(IS_VGPR);
992         initializeGprCountSymbol(IS_SGPR);
993       } else
994         KernelScope.initialize(getContext());
995     }
996   }
997 
998   bool hasXNACK() const {
999     return AMDGPU::hasXNACK(getSTI());
1000   }
1001 
1002   bool hasMIMG_R128() const {
1003     return AMDGPU::hasMIMG_R128(getSTI());
1004   }
1005 
1006   bool hasPackedD16() const {
1007     return AMDGPU::hasPackedD16(getSTI());
1008   }
1009 
1010   bool isSI() const {
1011     return AMDGPU::isSI(getSTI());
1012   }
1013 
1014   bool isCI() const {
1015     return AMDGPU::isCI(getSTI());
1016   }
1017 
1018   bool isVI() const {
1019     return AMDGPU::isVI(getSTI());
1020   }
1021 
1022   bool isGFX9() const {
1023     return AMDGPU::isGFX9(getSTI());
1024   }
1025 
1026   bool isGFX10() const {
1027     return AMDGPU::isGFX10(getSTI());
1028   }
1029 
1030   bool hasInv2PiInlineImm() const {
1031     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1032   }
1033 
1034   bool hasFlatOffsets() const {
1035     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1036   }
1037 
1038   bool hasSGPR102_SGPR103() const {
1039     return !isVI() && !isGFX9();
1040   }
1041 
1042   bool hasSGPR104_SGPR105() const {
1043     return isGFX10();
1044   }
1045 
1046   bool hasIntClamp() const {
1047     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1048   }
1049 
1050   AMDGPUTargetStreamer &getTargetStreamer() {
1051     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1052     return static_cast<AMDGPUTargetStreamer &>(TS);
1053   }
1054 
1055   const MCRegisterInfo *getMRI() const {
1056     // We need this const_cast because for some reason getContext() is not const
1057     // in MCAsmParser.
1058     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1059   }
1060 
1061   const MCInstrInfo *getMII() const {
1062     return &MII;
1063   }
1064 
1065   const FeatureBitset &getFeatureBits() const {
1066     return getSTI().getFeatureBits();
1067   }
1068 
1069   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1070   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1071   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1072 
1073   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1074   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1075   bool isForcedDPP() const { return ForcedDPP; }
1076   bool isForcedSDWA() const { return ForcedSDWA; }
1077   ArrayRef<unsigned> getMatchedVariants() const;
1078 
1079   std::unique_ptr<AMDGPUOperand> parseRegister();
1080   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1081   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1082   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1083                                       unsigned Kind) override;
1084   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1085                                OperandVector &Operands, MCStreamer &Out,
1086                                uint64_t &ErrorInfo,
1087                                bool MatchingInlineAsm) override;
1088   bool ParseDirective(AsmToken DirectiveID) override;
1089   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1090                                     OperandMode Mode = OperandMode_Default);
1091   StringRef parseMnemonicSuffix(StringRef Name);
1092   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1093                         SMLoc NameLoc, OperandVector &Operands) override;
1094   //bool ProcessInstruction(MCInst &Inst);
1095 
1096   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1097 
1098   OperandMatchResultTy
1099   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1100                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1101                      bool (*ConvertResult)(int64_t &) = nullptr);
1102 
1103   OperandMatchResultTy
1104   parseOperandArrayWithPrefix(const char *Prefix,
1105                               OperandVector &Operands,
1106                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1107                               bool (*ConvertResult)(int64_t&) = nullptr);
1108 
1109   OperandMatchResultTy
1110   parseNamedBit(const char *Name, OperandVector &Operands,
1111                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1112   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1113                                              StringRef &Value);
1114 
1115   bool isModifier();
1116   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1117   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1118   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1119   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1120   bool parseSP3NegModifier();
1121   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1122   OperandMatchResultTy parseReg(OperandVector &Operands);
1123   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1124   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1125   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1126   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1127   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1128   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1129   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1130 
1131   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1132   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1133   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1134   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1135 
1136   bool parseCnt(int64_t &IntVal);
1137   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1138   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1139 
1140 private:
1141   struct OperandInfoTy {
1142     int64_t Id;
1143     bool IsSymbolic = false;
1144 
1145     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1146   };
1147 
1148   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1149   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1150   void validateHwreg(const OperandInfoTy &HwReg,
1151                      const int64_t Offset,
1152                      const int64_t Width,
1153                      const SMLoc Loc);
1154 
1155   void errorExpTgt();
1156   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1157 
1158   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1159   bool validateSOPLiteral(const MCInst &Inst) const;
1160   bool validateConstantBusLimitations(const MCInst &Inst);
1161   bool validateEarlyClobberLimitations(const MCInst &Inst);
1162   bool validateIntClampSupported(const MCInst &Inst);
1163   bool validateMIMGAtomicDMask(const MCInst &Inst);
1164   bool validateMIMGGatherDMask(const MCInst &Inst);
1165   bool validateMIMGDataSize(const MCInst &Inst);
1166   bool validateMIMGAddrSize(const MCInst &Inst);
1167   bool validateMIMGD16(const MCInst &Inst);
1168   bool validateMIMGDim(const MCInst &Inst);
1169   bool validateLdsDirect(const MCInst &Inst);
1170   bool validateOpSel(const MCInst &Inst);
1171   bool validateVccOperand(unsigned Reg) const;
1172   bool validateVOP3Literal(const MCInst &Inst) const;
1173   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1174   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1175   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1176 
1177   bool isId(const StringRef Id) const;
1178   bool isId(const AsmToken &Token, const StringRef Id) const;
1179   bool isToken(const AsmToken::TokenKind Kind) const;
1180   bool trySkipId(const StringRef Id);
1181   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1182   bool trySkipToken(const AsmToken::TokenKind Kind);
1183   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1184   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1185   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1186   AsmToken::TokenKind getTokenKind() const;
1187   bool parseExpr(int64_t &Imm);
1188   StringRef getTokenStr() const;
1189   AsmToken peekToken();
1190   AsmToken getToken() const;
1191   SMLoc getLoc() const;
1192   void lex();
1193 
1194 public:
1195   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1196   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1197 
1198   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1199   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1200   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1201   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1202   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1203   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1204 
1205   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1206                             const unsigned MinVal,
1207                             const unsigned MaxVal,
1208                             const StringRef ErrMsg);
1209   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1210   bool parseSwizzleOffset(int64_t &Imm);
1211   bool parseSwizzleMacro(int64_t &Imm);
1212   bool parseSwizzleQuadPerm(int64_t &Imm);
1213   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1214   bool parseSwizzleBroadcast(int64_t &Imm);
1215   bool parseSwizzleSwap(int64_t &Imm);
1216   bool parseSwizzleReverse(int64_t &Imm);
1217 
1218   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1219   int64_t parseGPRIdxMacro();
1220 
1221   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1222   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1223   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1224   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1225   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1226 
1227   AMDGPUOperand::Ptr defaultDLC() const;
1228   AMDGPUOperand::Ptr defaultGLC() const;
1229   AMDGPUOperand::Ptr defaultSLC() const;
1230 
1231   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1232   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1233   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1234   AMDGPUOperand::Ptr defaultOffsetU12() const;
1235   AMDGPUOperand::Ptr defaultOffsetS13() const;
1236 
1237   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1238 
1239   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1240                OptionalImmIndexMap &OptionalIdx);
1241   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1242   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1243   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1244 
1245   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1246 
1247   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1248                bool IsAtomic = false);
1249   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1250 
1251   OperandMatchResultTy parseDim(OperandVector &Operands);
1252   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1253   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1254   AMDGPUOperand::Ptr defaultRowMask() const;
1255   AMDGPUOperand::Ptr defaultBankMask() const;
1256   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1257   AMDGPUOperand::Ptr defaultFI() const;
1258   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1259   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1260 
1261   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1262                                     AMDGPUOperand::ImmTy Type);
1263   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1264   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1265   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1266   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1267   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1268   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1269                 uint64_t BasicInstType, bool skipVcc = false);
1270 
1271   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1272   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1273 };
1274 
1275 struct OptionalOperand {
1276   const char *Name;
1277   AMDGPUOperand::ImmTy Type;
1278   bool IsBit;
1279   bool (*ConvertResult)(int64_t&);
1280 };
1281 
1282 } // end anonymous namespace
1283 
1284 // May be called with integer type with equivalent bitwidth.
1285 static const fltSemantics *getFltSemantics(unsigned Size) {
1286   switch (Size) {
1287   case 4:
1288     return &APFloat::IEEEsingle();
1289   case 8:
1290     return &APFloat::IEEEdouble();
1291   case 2:
1292     return &APFloat::IEEEhalf();
1293   default:
1294     llvm_unreachable("unsupported fp type");
1295   }
1296 }
1297 
1298 static const fltSemantics *getFltSemantics(MVT VT) {
1299   return getFltSemantics(VT.getSizeInBits() / 8);
1300 }
1301 
1302 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1303   switch (OperandType) {
1304   case AMDGPU::OPERAND_REG_IMM_INT32:
1305   case AMDGPU::OPERAND_REG_IMM_FP32:
1306   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1307   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1308     return &APFloat::IEEEsingle();
1309   case AMDGPU::OPERAND_REG_IMM_INT64:
1310   case AMDGPU::OPERAND_REG_IMM_FP64:
1311   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1312   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1313     return &APFloat::IEEEdouble();
1314   case AMDGPU::OPERAND_REG_IMM_INT16:
1315   case AMDGPU::OPERAND_REG_IMM_FP16:
1316   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1317   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1318   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1319   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1320   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1321   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1322     return &APFloat::IEEEhalf();
1323   default:
1324     llvm_unreachable("unsupported fp type");
1325   }
1326 }
1327 
1328 //===----------------------------------------------------------------------===//
1329 // Operand
1330 //===----------------------------------------------------------------------===//
1331 
1332 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1333   bool Lost;
1334 
1335   // Convert literal to single precision
1336   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1337                                                APFloat::rmNearestTiesToEven,
1338                                                &Lost);
1339   // We allow precision lost but not overflow or underflow
1340   if (Status != APFloat::opOK &&
1341       Lost &&
1342       ((Status & APFloat::opOverflow)  != 0 ||
1343        (Status & APFloat::opUnderflow) != 0)) {
1344     return false;
1345   }
1346 
1347   return true;
1348 }
1349 
1350 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1351   return isUIntN(Size, Val) || isIntN(Size, Val);
1352 }
1353 
1354 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1355 
1356   // This is a hack to enable named inline values like
1357   // shared_base with both 32-bit and 64-bit operands.
1358   // Note that these values are defined as
1359   // 32-bit operands only.
1360   if (isInlineValue()) {
1361     return true;
1362   }
1363 
1364   if (!isImmTy(ImmTyNone)) {
1365     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1366     return false;
1367   }
1368   // TODO: We should avoid using host float here. It would be better to
1369   // check the float bit values which is what a few other places do.
1370   // We've had bot failures before due to weird NaN support on mips hosts.
1371 
1372   APInt Literal(64, Imm.Val);
1373 
1374   if (Imm.IsFPImm) { // We got fp literal token
1375     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1376       return AMDGPU::isInlinableLiteral64(Imm.Val,
1377                                           AsmParser->hasInv2PiInlineImm());
1378     }
1379 
1380     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1381     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1382       return false;
1383 
1384     if (type.getScalarSizeInBits() == 16) {
1385       return AMDGPU::isInlinableLiteral16(
1386         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1387         AsmParser->hasInv2PiInlineImm());
1388     }
1389 
1390     // Check if single precision literal is inlinable
1391     return AMDGPU::isInlinableLiteral32(
1392       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1393       AsmParser->hasInv2PiInlineImm());
1394   }
1395 
1396   // We got int literal token.
1397   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1398     return AMDGPU::isInlinableLiteral64(Imm.Val,
1399                                         AsmParser->hasInv2PiInlineImm());
1400   }
1401 
1402   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1403     return false;
1404   }
1405 
1406   if (type.getScalarSizeInBits() == 16) {
1407     return AMDGPU::isInlinableLiteral16(
1408       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1409       AsmParser->hasInv2PiInlineImm());
1410   }
1411 
1412   return AMDGPU::isInlinableLiteral32(
1413     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1414     AsmParser->hasInv2PiInlineImm());
1415 }
1416 
1417 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1418   // Check that this immediate can be added as literal
1419   if (!isImmTy(ImmTyNone)) {
1420     return false;
1421   }
1422 
1423   if (!Imm.IsFPImm) {
1424     // We got int literal token.
1425 
1426     if (type == MVT::f64 && hasFPModifiers()) {
1427       // Cannot apply fp modifiers to int literals preserving the same semantics
1428       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1429       // disable these cases.
1430       return false;
1431     }
1432 
1433     unsigned Size = type.getSizeInBits();
1434     if (Size == 64)
1435       Size = 32;
1436 
1437     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1438     // types.
1439     return isSafeTruncation(Imm.Val, Size);
1440   }
1441 
1442   // We got fp literal token
1443   if (type == MVT::f64) { // Expected 64-bit fp operand
1444     // We would set low 64-bits of literal to zeroes but we accept this literals
1445     return true;
1446   }
1447 
1448   if (type == MVT::i64) { // Expected 64-bit int operand
1449     // We don't allow fp literals in 64-bit integer instructions. It is
1450     // unclear how we should encode them.
1451     return false;
1452   }
1453 
1454   // We allow fp literals with f16x2 operands assuming that the specified
1455   // literal goes into the lower half and the upper half is zero. We also
1456   // require that the literal may be losslesly converted to f16.
1457   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1458                      (type == MVT::v2i16)? MVT::i16 : type;
1459 
1460   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1461   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1462 }
1463 
1464 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1465   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1466 }
1467 
1468 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1469   if (AsmParser->isVI())
1470     return isVReg32();
1471   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1472     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1473   else
1474     return false;
1475 }
1476 
1477 bool AMDGPUOperand::isSDWAFP16Operand() const {
1478   return isSDWAOperand(MVT::f16);
1479 }
1480 
1481 bool AMDGPUOperand::isSDWAFP32Operand() const {
1482   return isSDWAOperand(MVT::f32);
1483 }
1484 
1485 bool AMDGPUOperand::isSDWAInt16Operand() const {
1486   return isSDWAOperand(MVT::i16);
1487 }
1488 
1489 bool AMDGPUOperand::isSDWAInt32Operand() const {
1490   return isSDWAOperand(MVT::i32);
1491 }
1492 
1493 bool AMDGPUOperand::isBoolReg() const {
1494   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1495     isSCSrcB64() : isSCSrcB32();
1496 }
1497 
1498 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1499 {
1500   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1501   assert(Size == 2 || Size == 4 || Size == 8);
1502 
1503   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1504 
1505   if (Imm.Mods.Abs) {
1506     Val &= ~FpSignMask;
1507   }
1508   if (Imm.Mods.Neg) {
1509     Val ^= FpSignMask;
1510   }
1511 
1512   return Val;
1513 }
1514 
1515 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1516   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1517                              Inst.getNumOperands())) {
1518     addLiteralImmOperand(Inst, Imm.Val,
1519                          ApplyModifiers &
1520                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1521   } else {
1522     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1523     Inst.addOperand(MCOperand::createImm(Imm.Val));
1524   }
1525 }
1526 
1527 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1528   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1529   auto OpNum = Inst.getNumOperands();
1530   // Check that this operand accepts literals
1531   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1532 
1533   if (ApplyModifiers) {
1534     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1535     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1536     Val = applyInputFPModifiers(Val, Size);
1537   }
1538 
1539   APInt Literal(64, Val);
1540   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1541 
1542   if (Imm.IsFPImm) { // We got fp literal token
1543     switch (OpTy) {
1544     case AMDGPU::OPERAND_REG_IMM_INT64:
1545     case AMDGPU::OPERAND_REG_IMM_FP64:
1546     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1547     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1548       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1549                                        AsmParser->hasInv2PiInlineImm())) {
1550         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1551         return;
1552       }
1553 
1554       // Non-inlineable
1555       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1556         // For fp operands we check if low 32 bits are zeros
1557         if (Literal.getLoBits(32) != 0) {
1558           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1559           "Can't encode literal as exact 64-bit floating-point operand. "
1560           "Low 32-bits will be set to zero");
1561         }
1562 
1563         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1564         return;
1565       }
1566 
1567       // We don't allow fp literals in 64-bit integer instructions. It is
1568       // unclear how we should encode them. This case should be checked earlier
1569       // in predicate methods (isLiteralImm())
1570       llvm_unreachable("fp literal in 64-bit integer instruction.");
1571 
1572     case AMDGPU::OPERAND_REG_IMM_INT32:
1573     case AMDGPU::OPERAND_REG_IMM_FP32:
1574     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1575     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1576     case AMDGPU::OPERAND_REG_IMM_INT16:
1577     case AMDGPU::OPERAND_REG_IMM_FP16:
1578     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1579     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1580     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1581     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1582     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1583     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1584       bool lost;
1585       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1586       // Convert literal to single precision
1587       FPLiteral.convert(*getOpFltSemantics(OpTy),
1588                         APFloat::rmNearestTiesToEven, &lost);
1589       // We allow precision lost but not overflow or underflow. This should be
1590       // checked earlier in isLiteralImm()
1591 
1592       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1593       Inst.addOperand(MCOperand::createImm(ImmVal));
1594       return;
1595     }
1596     default:
1597       llvm_unreachable("invalid operand size");
1598     }
1599 
1600     return;
1601   }
1602 
1603   // We got int literal token.
1604   // Only sign extend inline immediates.
1605   switch (OpTy) {
1606   case AMDGPU::OPERAND_REG_IMM_INT32:
1607   case AMDGPU::OPERAND_REG_IMM_FP32:
1608   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1609   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1610   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1611   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1612     if (isSafeTruncation(Val, 32) &&
1613         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1614                                      AsmParser->hasInv2PiInlineImm())) {
1615       Inst.addOperand(MCOperand::createImm(Val));
1616       return;
1617     }
1618 
1619     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1620     return;
1621 
1622   case AMDGPU::OPERAND_REG_IMM_INT64:
1623   case AMDGPU::OPERAND_REG_IMM_FP64:
1624   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1625   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1626     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1627       Inst.addOperand(MCOperand::createImm(Val));
1628       return;
1629     }
1630 
1631     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1632     return;
1633 
1634   case AMDGPU::OPERAND_REG_IMM_INT16:
1635   case AMDGPU::OPERAND_REG_IMM_FP16:
1636   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1637   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1638     if (isSafeTruncation(Val, 16) &&
1639         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1640                                      AsmParser->hasInv2PiInlineImm())) {
1641       Inst.addOperand(MCOperand::createImm(Val));
1642       return;
1643     }
1644 
1645     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1646     return;
1647 
1648   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1649   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1650     assert(isSafeTruncation(Val, 16));
1651     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1652                                         AsmParser->hasInv2PiInlineImm()));
1653 
1654     Inst.addOperand(MCOperand::createImm(Val));
1655     return;
1656   }
1657   default:
1658     llvm_unreachable("invalid operand size");
1659   }
1660 }
1661 
1662 template <unsigned Bitwidth>
1663 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1664   APInt Literal(64, Imm.Val);
1665 
1666   if (!Imm.IsFPImm) {
1667     // We got int literal token.
1668     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1669     return;
1670   }
1671 
1672   bool Lost;
1673   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1674   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1675                     APFloat::rmNearestTiesToEven, &Lost);
1676   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1677 }
1678 
1679 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1680   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1681 }
1682 
1683 static bool isInlineValue(unsigned Reg) {
1684   switch (Reg) {
1685   case AMDGPU::SRC_SHARED_BASE:
1686   case AMDGPU::SRC_SHARED_LIMIT:
1687   case AMDGPU::SRC_PRIVATE_BASE:
1688   case AMDGPU::SRC_PRIVATE_LIMIT:
1689   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1690     return true;
1691   case AMDGPU::SRC_VCCZ:
1692   case AMDGPU::SRC_EXECZ:
1693   case AMDGPU::SRC_SCC:
1694     return true;
1695   default:
1696     return false;
1697   }
1698 }
1699 
1700 bool AMDGPUOperand::isInlineValue() const {
1701   return isRegKind() && ::isInlineValue(getReg());
1702 }
1703 
1704 //===----------------------------------------------------------------------===//
1705 // AsmParser
1706 //===----------------------------------------------------------------------===//
1707 
1708 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1709   if (Is == IS_VGPR) {
1710     switch (RegWidth) {
1711       default: return -1;
1712       case 1: return AMDGPU::VGPR_32RegClassID;
1713       case 2: return AMDGPU::VReg_64RegClassID;
1714       case 3: return AMDGPU::VReg_96RegClassID;
1715       case 4: return AMDGPU::VReg_128RegClassID;
1716       case 8: return AMDGPU::VReg_256RegClassID;
1717       case 16: return AMDGPU::VReg_512RegClassID;
1718     }
1719   } else if (Is == IS_TTMP) {
1720     switch (RegWidth) {
1721       default: return -1;
1722       case 1: return AMDGPU::TTMP_32RegClassID;
1723       case 2: return AMDGPU::TTMP_64RegClassID;
1724       case 4: return AMDGPU::TTMP_128RegClassID;
1725       case 8: return AMDGPU::TTMP_256RegClassID;
1726       case 16: return AMDGPU::TTMP_512RegClassID;
1727     }
1728   } else if (Is == IS_SGPR) {
1729     switch (RegWidth) {
1730       default: return -1;
1731       case 1: return AMDGPU::SGPR_32RegClassID;
1732       case 2: return AMDGPU::SGPR_64RegClassID;
1733       case 4: return AMDGPU::SGPR_128RegClassID;
1734       case 8: return AMDGPU::SGPR_256RegClassID;
1735       case 16: return AMDGPU::SGPR_512RegClassID;
1736     }
1737   }
1738   return -1;
1739 }
1740 
1741 static unsigned getSpecialRegForName(StringRef RegName) {
1742   return StringSwitch<unsigned>(RegName)
1743     .Case("exec", AMDGPU::EXEC)
1744     .Case("vcc", AMDGPU::VCC)
1745     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1746     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1747     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1748     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1749     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1750     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1751     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1752     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1753     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1754     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1755     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1756     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1757     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1758     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1759     .Case("m0", AMDGPU::M0)
1760     .Case("vccz", AMDGPU::SRC_VCCZ)
1761     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1762     .Case("execz", AMDGPU::SRC_EXECZ)
1763     .Case("src_execz", AMDGPU::SRC_EXECZ)
1764     .Case("scc", AMDGPU::SRC_SCC)
1765     .Case("src_scc", AMDGPU::SRC_SCC)
1766     .Case("tba", AMDGPU::TBA)
1767     .Case("tma", AMDGPU::TMA)
1768     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1769     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1770     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1771     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1772     .Case("vcc_lo", AMDGPU::VCC_LO)
1773     .Case("vcc_hi", AMDGPU::VCC_HI)
1774     .Case("exec_lo", AMDGPU::EXEC_LO)
1775     .Case("exec_hi", AMDGPU::EXEC_HI)
1776     .Case("tma_lo", AMDGPU::TMA_LO)
1777     .Case("tma_hi", AMDGPU::TMA_HI)
1778     .Case("tba_lo", AMDGPU::TBA_LO)
1779     .Case("tba_hi", AMDGPU::TBA_HI)
1780     .Case("null", AMDGPU::SGPR_NULL)
1781     .Default(0);
1782 }
1783 
1784 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1785                                     SMLoc &EndLoc) {
1786   auto R = parseRegister();
1787   if (!R) return true;
1788   assert(R->isReg());
1789   RegNo = R->getReg();
1790   StartLoc = R->getStartLoc();
1791   EndLoc = R->getEndLoc();
1792   return false;
1793 }
1794 
1795 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1796                                             RegisterKind RegKind, unsigned Reg1,
1797                                             unsigned RegNum) {
1798   switch (RegKind) {
1799   case IS_SPECIAL:
1800     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1801       Reg = AMDGPU::EXEC;
1802       RegWidth = 2;
1803       return true;
1804     }
1805     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1806       Reg = AMDGPU::FLAT_SCR;
1807       RegWidth = 2;
1808       return true;
1809     }
1810     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1811       Reg = AMDGPU::XNACK_MASK;
1812       RegWidth = 2;
1813       return true;
1814     }
1815     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1816       Reg = AMDGPU::VCC;
1817       RegWidth = 2;
1818       return true;
1819     }
1820     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1821       Reg = AMDGPU::TBA;
1822       RegWidth = 2;
1823       return true;
1824     }
1825     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1826       Reg = AMDGPU::TMA;
1827       RegWidth = 2;
1828       return true;
1829     }
1830     return false;
1831   case IS_VGPR:
1832   case IS_SGPR:
1833   case IS_TTMP:
1834     if (Reg1 != Reg + RegWidth) {
1835       return false;
1836     }
1837     RegWidth++;
1838     return true;
1839   default:
1840     llvm_unreachable("unexpected register kind");
1841   }
1842 }
1843 
1844 static const StringRef Registers[] = {
1845   { "v" },
1846   { "s" },
1847   { "ttmp" },
1848 };
1849 
1850 bool
1851 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1852                             const AsmToken &NextToken) const {
1853 
1854   // A list of consecutive registers: [s0,s1,s2,s3]
1855   if (Token.is(AsmToken::LBrac))
1856     return true;
1857 
1858   if (!Token.is(AsmToken::Identifier))
1859     return false;
1860 
1861   // A single register like s0 or a range of registers like s[0:1]
1862 
1863   StringRef RegName = Token.getString();
1864 
1865   for (StringRef Reg : Registers) {
1866     if (RegName.startswith(Reg)) {
1867       if (Reg.size() < RegName.size()) {
1868         unsigned RegNum;
1869         // A single register with an index: rXX
1870         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1871           return true;
1872       } else {
1873         // A range of registers: r[XX:YY].
1874         if (NextToken.is(AsmToken::LBrac))
1875           return true;
1876       }
1877     }
1878   }
1879 
1880   return getSpecialRegForName(RegName);
1881 }
1882 
1883 bool
1884 AMDGPUAsmParser::isRegister()
1885 {
1886   return isRegister(getToken(), peekToken());
1887 }
1888 
1889 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1890                                           unsigned &RegNum, unsigned &RegWidth,
1891                                           unsigned *DwordRegIndex) {
1892   if (DwordRegIndex) { *DwordRegIndex = 0; }
1893   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1894   if (getLexer().is(AsmToken::Identifier)) {
1895     StringRef RegName = Parser.getTok().getString();
1896     if ((Reg = getSpecialRegForName(RegName))) {
1897       Parser.Lex();
1898       RegKind = IS_SPECIAL;
1899     } else {
1900       unsigned RegNumIndex = 0;
1901       if (RegName[0] == 'v') {
1902         RegNumIndex = 1;
1903         RegKind = IS_VGPR;
1904       } else if (RegName[0] == 's') {
1905         RegNumIndex = 1;
1906         RegKind = IS_SGPR;
1907       } else if (RegName.startswith("ttmp")) {
1908         RegNumIndex = strlen("ttmp");
1909         RegKind = IS_TTMP;
1910       } else {
1911         return false;
1912       }
1913       if (RegName.size() > RegNumIndex) {
1914         // Single 32-bit register: vXX.
1915         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1916           return false;
1917         Parser.Lex();
1918         RegWidth = 1;
1919       } else {
1920         // Range of registers: v[XX:YY]. ":YY" is optional.
1921         Parser.Lex();
1922         int64_t RegLo, RegHi;
1923         if (getLexer().isNot(AsmToken::LBrac))
1924           return false;
1925         Parser.Lex();
1926 
1927         if (getParser().parseAbsoluteExpression(RegLo))
1928           return false;
1929 
1930         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1931         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1932           return false;
1933         Parser.Lex();
1934 
1935         if (isRBrace) {
1936           RegHi = RegLo;
1937         } else {
1938           if (getParser().parseAbsoluteExpression(RegHi))
1939             return false;
1940 
1941           if (getLexer().isNot(AsmToken::RBrac))
1942             return false;
1943           Parser.Lex();
1944         }
1945         RegNum = (unsigned) RegLo;
1946         RegWidth = (RegHi - RegLo) + 1;
1947       }
1948     }
1949   } else if (getLexer().is(AsmToken::LBrac)) {
1950     // List of consecutive registers: [s0,s1,s2,s3]
1951     Parser.Lex();
1952     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1953       return false;
1954     if (RegWidth != 1)
1955       return false;
1956     RegisterKind RegKind1;
1957     unsigned Reg1, RegNum1, RegWidth1;
1958     do {
1959       if (getLexer().is(AsmToken::Comma)) {
1960         Parser.Lex();
1961       } else if (getLexer().is(AsmToken::RBrac)) {
1962         Parser.Lex();
1963         break;
1964       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1965         if (RegWidth1 != 1) {
1966           return false;
1967         }
1968         if (RegKind1 != RegKind) {
1969           return false;
1970         }
1971         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1972           return false;
1973         }
1974       } else {
1975         return false;
1976       }
1977     } while (true);
1978   } else {
1979     return false;
1980   }
1981   switch (RegKind) {
1982   case IS_SPECIAL:
1983     RegNum = 0;
1984     RegWidth = 1;
1985     break;
1986   case IS_VGPR:
1987   case IS_SGPR:
1988   case IS_TTMP:
1989   {
1990     unsigned Size = 1;
1991     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1992       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1993       Size = std::min(RegWidth, 4u);
1994     }
1995     if (RegNum % Size != 0)
1996       return false;
1997     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1998     RegNum = RegNum / Size;
1999     int RCID = getRegClass(RegKind, RegWidth);
2000     if (RCID == -1)
2001       return false;
2002     const MCRegisterClass RC = TRI->getRegClass(RCID);
2003     if (RegNum >= RC.getNumRegs())
2004       return false;
2005     Reg = RC.getRegister(RegNum);
2006     break;
2007   }
2008 
2009   default:
2010     llvm_unreachable("unexpected register kind");
2011   }
2012 
2013   if (!subtargetHasRegister(*TRI, Reg))
2014     return false;
2015   return true;
2016 }
2017 
2018 Optional<StringRef>
2019 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2020   switch (RegKind) {
2021   case IS_VGPR:
2022     return StringRef(".amdgcn.next_free_vgpr");
2023   case IS_SGPR:
2024     return StringRef(".amdgcn.next_free_sgpr");
2025   default:
2026     return None;
2027   }
2028 }
2029 
2030 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2031   auto SymbolName = getGprCountSymbolName(RegKind);
2032   assert(SymbolName && "initializing invalid register kind");
2033   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2034   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2035 }
2036 
2037 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2038                                             unsigned DwordRegIndex,
2039                                             unsigned RegWidth) {
2040   // Symbols are only defined for GCN targets
2041   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2042     return true;
2043 
2044   auto SymbolName = getGprCountSymbolName(RegKind);
2045   if (!SymbolName)
2046     return true;
2047   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2048 
2049   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2050   int64_t OldCount;
2051 
2052   if (!Sym->isVariable())
2053     return !Error(getParser().getTok().getLoc(),
2054                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2055   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2056     return !Error(
2057         getParser().getTok().getLoc(),
2058         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2059 
2060   if (OldCount <= NewMax)
2061     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2062 
2063   return true;
2064 }
2065 
2066 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2067   const auto &Tok = Parser.getTok();
2068   SMLoc StartLoc = Tok.getLoc();
2069   SMLoc EndLoc = Tok.getEndLoc();
2070   RegisterKind RegKind;
2071   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2072 
2073   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2074     //FIXME: improve error messages (bug 41303).
2075     Error(StartLoc, "not a valid operand.");
2076     return nullptr;
2077   }
2078   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2079     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2080       return nullptr;
2081   } else
2082     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2083   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2084 }
2085 
2086 OperandMatchResultTy
2087 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2088   // TODO: add syntactic sugar for 1/(2*PI)
2089 
2090   assert(!isRegister());
2091   assert(!isModifier());
2092 
2093   const auto& Tok = getToken();
2094   const auto& NextTok = peekToken();
2095   bool IsReal = Tok.is(AsmToken::Real);
2096   SMLoc S = getLoc();
2097   bool Negate = false;
2098 
2099   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2100     lex();
2101     IsReal = true;
2102     Negate = true;
2103   }
2104 
2105   if (IsReal) {
2106     // Floating-point expressions are not supported.
2107     // Can only allow floating-point literals with an
2108     // optional sign.
2109 
2110     StringRef Num = getTokenStr();
2111     lex();
2112 
2113     APFloat RealVal(APFloat::IEEEdouble());
2114     auto roundMode = APFloat::rmNearestTiesToEven;
2115     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2116       return MatchOperand_ParseFail;
2117     }
2118     if (Negate)
2119       RealVal.changeSign();
2120 
2121     Operands.push_back(
2122       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2123                                AMDGPUOperand::ImmTyNone, true));
2124 
2125     return MatchOperand_Success;
2126 
2127   } else {
2128     int64_t IntVal;
2129     const MCExpr *Expr;
2130     SMLoc S = getLoc();
2131 
2132     if (HasSP3AbsModifier) {
2133       // This is a workaround for handling expressions
2134       // as arguments of SP3 'abs' modifier, for example:
2135       //     |1.0|
2136       //     |-1|
2137       //     |1+x|
2138       // This syntax is not compatible with syntax of standard
2139       // MC expressions (due to the trailing '|').
2140       SMLoc EndLoc;
2141       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2142         return MatchOperand_ParseFail;
2143     } else {
2144       if (Parser.parseExpression(Expr))
2145         return MatchOperand_ParseFail;
2146     }
2147 
2148     if (Expr->evaluateAsAbsolute(IntVal)) {
2149       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2150     } else {
2151       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2152     }
2153 
2154     return MatchOperand_Success;
2155   }
2156 
2157   return MatchOperand_NoMatch;
2158 }
2159 
2160 OperandMatchResultTy
2161 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2162   if (!isRegister())
2163     return MatchOperand_NoMatch;
2164 
2165   if (auto R = parseRegister()) {
2166     assert(R->isReg());
2167     Operands.push_back(std::move(R));
2168     return MatchOperand_Success;
2169   }
2170   return MatchOperand_ParseFail;
2171 }
2172 
2173 OperandMatchResultTy
2174 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2175   auto res = parseReg(Operands);
2176   if (res != MatchOperand_NoMatch) {
2177     return res;
2178   } else if (isModifier()) {
2179     return MatchOperand_NoMatch;
2180   } else {
2181     return parseImm(Operands, HasSP3AbsMod);
2182   }
2183 }
2184 
2185 bool
2186 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2187   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2188     const auto &str = Token.getString();
2189     return str == "abs" || str == "neg" || str == "sext";
2190   }
2191   return false;
2192 }
2193 
2194 bool
2195 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2196   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2197 }
2198 
2199 bool
2200 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2201   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2202 }
2203 
2204 bool
2205 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2206   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2207 }
2208 
2209 // Check if this is an operand modifier or an opcode modifier
2210 // which may look like an expression but it is not. We should
2211 // avoid parsing these modifiers as expressions. Currently
2212 // recognized sequences are:
2213 //   |...|
2214 //   abs(...)
2215 //   neg(...)
2216 //   sext(...)
2217 //   -reg
2218 //   -|...|
2219 //   -abs(...)
2220 //   name:...
2221 // Note that simple opcode modifiers like 'gds' may be parsed as
2222 // expressions; this is a special case. See getExpressionAsToken.
2223 //
2224 bool
2225 AMDGPUAsmParser::isModifier() {
2226 
2227   AsmToken Tok = getToken();
2228   AsmToken NextToken[2];
2229   peekTokens(NextToken);
2230 
2231   return isOperandModifier(Tok, NextToken[0]) ||
2232          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2233          isOpcodeModifierWithVal(Tok, NextToken[0]);
2234 }
2235 
2236 // Check if the current token is an SP3 'neg' modifier.
2237 // Currently this modifier is allowed in the following context:
2238 //
2239 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2240 // 2. Before an 'abs' modifier: -abs(...)
2241 // 3. Before an SP3 'abs' modifier: -|...|
2242 //
2243 // In all other cases "-" is handled as a part
2244 // of an expression that follows the sign.
2245 //
2246 // Note: When "-" is followed by an integer literal,
2247 // this is interpreted as integer negation rather
2248 // than a floating-point NEG modifier applied to N.
2249 // Beside being contr-intuitive, such use of floating-point
2250 // NEG modifier would have resulted in different meaning
2251 // of integer literals used with VOP1/2/C and VOP3,
2252 // for example:
2253 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2254 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2255 // Negative fp literals with preceding "-" are
2256 // handled likewise for unifomtity
2257 //
2258 bool
2259 AMDGPUAsmParser::parseSP3NegModifier() {
2260 
2261   AsmToken NextToken[2];
2262   peekTokens(NextToken);
2263 
2264   if (isToken(AsmToken::Minus) &&
2265       (isRegister(NextToken[0], NextToken[1]) ||
2266        NextToken[0].is(AsmToken::Pipe) ||
2267        isId(NextToken[0], "abs"))) {
2268     lex();
2269     return true;
2270   }
2271 
2272   return false;
2273 }
2274 
2275 OperandMatchResultTy
2276 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2277                                               bool AllowImm) {
2278   bool Neg, SP3Neg;
2279   bool Abs, SP3Abs;
2280   SMLoc Loc;
2281 
2282   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2283   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2284     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2285     return MatchOperand_ParseFail;
2286   }
2287 
2288   SP3Neg = parseSP3NegModifier();
2289 
2290   Loc = getLoc();
2291   Neg = trySkipId("neg");
2292   if (Neg && SP3Neg) {
2293     Error(Loc, "expected register or immediate");
2294     return MatchOperand_ParseFail;
2295   }
2296   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2297     return MatchOperand_ParseFail;
2298 
2299   Abs = trySkipId("abs");
2300   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2301     return MatchOperand_ParseFail;
2302 
2303   Loc = getLoc();
2304   SP3Abs = trySkipToken(AsmToken::Pipe);
2305   if (Abs && SP3Abs) {
2306     Error(Loc, "expected register or immediate");
2307     return MatchOperand_ParseFail;
2308   }
2309 
2310   OperandMatchResultTy Res;
2311   if (AllowImm) {
2312     Res = parseRegOrImm(Operands, SP3Abs);
2313   } else {
2314     Res = parseReg(Operands);
2315   }
2316   if (Res != MatchOperand_Success) {
2317     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2318   }
2319 
2320   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2321     return MatchOperand_ParseFail;
2322   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2323     return MatchOperand_ParseFail;
2324   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2325     return MatchOperand_ParseFail;
2326 
2327   AMDGPUOperand::Modifiers Mods;
2328   Mods.Abs = Abs || SP3Abs;
2329   Mods.Neg = Neg || SP3Neg;
2330 
2331   if (Mods.hasFPModifiers()) {
2332     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2333     if (Op.isExpr()) {
2334       Error(Op.getStartLoc(), "expected an absolute expression");
2335       return MatchOperand_ParseFail;
2336     }
2337     Op.setModifiers(Mods);
2338   }
2339   return MatchOperand_Success;
2340 }
2341 
2342 OperandMatchResultTy
2343 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2344                                                bool AllowImm) {
2345   bool Sext = trySkipId("sext");
2346   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2347     return MatchOperand_ParseFail;
2348 
2349   OperandMatchResultTy Res;
2350   if (AllowImm) {
2351     Res = parseRegOrImm(Operands);
2352   } else {
2353     Res = parseReg(Operands);
2354   }
2355   if (Res != MatchOperand_Success) {
2356     return Sext? MatchOperand_ParseFail : Res;
2357   }
2358 
2359   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2360     return MatchOperand_ParseFail;
2361 
2362   AMDGPUOperand::Modifiers Mods;
2363   Mods.Sext = Sext;
2364 
2365   if (Mods.hasIntModifiers()) {
2366     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2367     if (Op.isExpr()) {
2368       Error(Op.getStartLoc(), "expected an absolute expression");
2369       return MatchOperand_ParseFail;
2370     }
2371     Op.setModifiers(Mods);
2372   }
2373 
2374   return MatchOperand_Success;
2375 }
2376 
2377 OperandMatchResultTy
2378 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2379   return parseRegOrImmWithFPInputMods(Operands, false);
2380 }
2381 
2382 OperandMatchResultTy
2383 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2384   return parseRegOrImmWithIntInputMods(Operands, false);
2385 }
2386 
2387 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2388   auto Loc = getLoc();
2389   if (trySkipId("off")) {
2390     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2391                                                 AMDGPUOperand::ImmTyOff, false));
2392     return MatchOperand_Success;
2393   }
2394 
2395   if (!isRegister())
2396     return MatchOperand_NoMatch;
2397 
2398   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2399   if (Reg) {
2400     Operands.push_back(std::move(Reg));
2401     return MatchOperand_Success;
2402   }
2403 
2404   return MatchOperand_ParseFail;
2405 
2406 }
2407 
2408 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2409   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2410 
2411   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2412       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2413       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2414       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2415     return Match_InvalidOperand;
2416 
2417   if ((TSFlags & SIInstrFlags::VOP3) &&
2418       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2419       getForcedEncodingSize() != 64)
2420     return Match_PreferE32;
2421 
2422   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2423       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2424     // v_mac_f32/16 allow only dst_sel == DWORD;
2425     auto OpNum =
2426         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2427     const auto &Op = Inst.getOperand(OpNum);
2428     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2429       return Match_InvalidOperand;
2430     }
2431   }
2432 
2433   if (TSFlags & SIInstrFlags::FLAT) {
2434     // FIXME: Produces error without correct column reported.
2435     auto Opcode = Inst.getOpcode();
2436     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2437 
2438     const auto &Op = Inst.getOperand(OpNum);
2439     if (!hasFlatOffsets() && Op.getImm() != 0)
2440       return Match_InvalidOperand;
2441 
2442     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2443     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2444     if (isGFX10()) {
2445       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2446         if (!isInt<12>(Op.getImm()))
2447           return Match_InvalidOperand;
2448       } else {
2449         if (!isUInt<11>(Op.getImm()))
2450           return Match_InvalidOperand;
2451       }
2452     }
2453   }
2454 
2455   return Match_Success;
2456 }
2457 
2458 // What asm variants we should check
2459 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2460   if (getForcedEncodingSize() == 32) {
2461     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2462     return makeArrayRef(Variants);
2463   }
2464 
2465   if (isForcedVOP3()) {
2466     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2467     return makeArrayRef(Variants);
2468   }
2469 
2470   if (isForcedSDWA()) {
2471     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2472                                         AMDGPUAsmVariants::SDWA9};
2473     return makeArrayRef(Variants);
2474   }
2475 
2476   if (isForcedDPP()) {
2477     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2478     return makeArrayRef(Variants);
2479   }
2480 
2481   static const unsigned Variants[] = {
2482     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2483     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2484   };
2485 
2486   return makeArrayRef(Variants);
2487 }
2488 
2489 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2490   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2491   const unsigned Num = Desc.getNumImplicitUses();
2492   for (unsigned i = 0; i < Num; ++i) {
2493     unsigned Reg = Desc.ImplicitUses[i];
2494     switch (Reg) {
2495     case AMDGPU::FLAT_SCR:
2496     case AMDGPU::VCC:
2497     case AMDGPU::VCC_LO:
2498     case AMDGPU::VCC_HI:
2499     case AMDGPU::M0:
2500     case AMDGPU::SGPR_NULL:
2501       return Reg;
2502     default:
2503       break;
2504     }
2505   }
2506   return AMDGPU::NoRegister;
2507 }
2508 
2509 // NB: This code is correct only when used to check constant
2510 // bus limitations because GFX7 support no f16 inline constants.
2511 // Note that there are no cases when a GFX7 opcode violates
2512 // constant bus limitations due to the use of an f16 constant.
2513 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2514                                        unsigned OpIdx) const {
2515   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2516 
2517   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2518     return false;
2519   }
2520 
2521   const MCOperand &MO = Inst.getOperand(OpIdx);
2522 
2523   int64_t Val = MO.getImm();
2524   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2525 
2526   switch (OpSize) { // expected operand size
2527   case 8:
2528     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2529   case 4:
2530     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2531   case 2: {
2532     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2533     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2534         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2535         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2536         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2537       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2538     } else {
2539       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2540     }
2541   }
2542   default:
2543     llvm_unreachable("invalid operand size");
2544   }
2545 }
2546 
2547 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2548   const MCOperand &MO = Inst.getOperand(OpIdx);
2549   if (MO.isImm()) {
2550     return !isInlineConstant(Inst, OpIdx);
2551   }
2552   return !MO.isReg() ||
2553          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2554 }
2555 
2556 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2557   const unsigned Opcode = Inst.getOpcode();
2558   const MCInstrDesc &Desc = MII.get(Opcode);
2559   unsigned ConstantBusUseCount = 0;
2560   unsigned NumLiterals = 0;
2561   unsigned LiteralSize;
2562 
2563   if (Desc.TSFlags &
2564       (SIInstrFlags::VOPC |
2565        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2566        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2567        SIInstrFlags::SDWA)) {
2568     // Check special imm operands (used by madmk, etc)
2569     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2570       ++ConstantBusUseCount;
2571     }
2572 
2573     SmallDenseSet<unsigned> SGPRsUsed;
2574     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2575     if (SGPRUsed != AMDGPU::NoRegister) {
2576       SGPRsUsed.insert(SGPRUsed);
2577       ++ConstantBusUseCount;
2578     }
2579 
2580     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2581     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2582     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2583 
2584     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2585 
2586     for (int OpIdx : OpIndices) {
2587       if (OpIdx == -1) break;
2588 
2589       const MCOperand &MO = Inst.getOperand(OpIdx);
2590       if (usesConstantBus(Inst, OpIdx)) {
2591         if (MO.isReg()) {
2592           const unsigned Reg = mc2PseudoReg(MO.getReg());
2593           // Pairs of registers with a partial intersections like these
2594           //   s0, s[0:1]
2595           //   flat_scratch_lo, flat_scratch
2596           //   flat_scratch_lo, flat_scratch_hi
2597           // are theoretically valid but they are disabled anyway.
2598           // Note that this code mimics SIInstrInfo::verifyInstruction
2599           if (!SGPRsUsed.count(Reg)) {
2600             SGPRsUsed.insert(Reg);
2601             ++ConstantBusUseCount;
2602           }
2603           SGPRUsed = Reg;
2604         } else { // Expression or a literal
2605 
2606           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2607             continue; // special operand like VINTERP attr_chan
2608 
2609           // An instruction may use only one literal.
2610           // This has been validated on the previous step.
2611           // See validateVOP3Literal.
2612           // This literal may be used as more than one operand.
2613           // If all these operands are of the same size,
2614           // this literal counts as one scalar value.
2615           // Otherwise it counts as 2 scalar values.
2616           // See "GFX10 Shader Programming", section 3.6.2.3.
2617 
2618           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2619           if (Size < 4) Size = 4;
2620 
2621           if (NumLiterals == 0) {
2622             NumLiterals = 1;
2623             LiteralSize = Size;
2624           } else if (LiteralSize != Size) {
2625             NumLiterals = 2;
2626           }
2627         }
2628       }
2629     }
2630   }
2631   ConstantBusUseCount += NumLiterals;
2632 
2633   if (isGFX10())
2634     return ConstantBusUseCount <= 2;
2635 
2636   return ConstantBusUseCount <= 1;
2637 }
2638 
2639 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2640   const unsigned Opcode = Inst.getOpcode();
2641   const MCInstrDesc &Desc = MII.get(Opcode);
2642 
2643   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2644   if (DstIdx == -1 ||
2645       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2646     return true;
2647   }
2648 
2649   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2650 
2651   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2652   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2653   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2654 
2655   assert(DstIdx != -1);
2656   const MCOperand &Dst = Inst.getOperand(DstIdx);
2657   assert(Dst.isReg());
2658   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2659 
2660   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2661 
2662   for (int SrcIdx : SrcIndices) {
2663     if (SrcIdx == -1) break;
2664     const MCOperand &Src = Inst.getOperand(SrcIdx);
2665     if (Src.isReg()) {
2666       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2667       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2668         return false;
2669       }
2670     }
2671   }
2672 
2673   return true;
2674 }
2675 
2676 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2677 
2678   const unsigned Opc = Inst.getOpcode();
2679   const MCInstrDesc &Desc = MII.get(Opc);
2680 
2681   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2682     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2683     assert(ClampIdx != -1);
2684     return Inst.getOperand(ClampIdx).getImm() == 0;
2685   }
2686 
2687   return true;
2688 }
2689 
2690 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2691 
2692   const unsigned Opc = Inst.getOpcode();
2693   const MCInstrDesc &Desc = MII.get(Opc);
2694 
2695   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2696     return true;
2697 
2698   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2699   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2700   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2701 
2702   assert(VDataIdx != -1);
2703   assert(DMaskIdx != -1);
2704   assert(TFEIdx != -1);
2705 
2706   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2707   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2708   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2709   if (DMask == 0)
2710     DMask = 1;
2711 
2712   unsigned DataSize =
2713     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2714   if (hasPackedD16()) {
2715     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2716     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2717       DataSize = (DataSize + 1) / 2;
2718   }
2719 
2720   return (VDataSize / 4) == DataSize + TFESize;
2721 }
2722 
2723 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2724   const unsigned Opc = Inst.getOpcode();
2725   const MCInstrDesc &Desc = MII.get(Opc);
2726 
2727   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2728     return true;
2729 
2730   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2731   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2732       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2733   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2734   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2735   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2736 
2737   assert(VAddr0Idx != -1);
2738   assert(SrsrcIdx != -1);
2739   assert(DimIdx != -1);
2740   assert(SrsrcIdx > VAddr0Idx);
2741 
2742   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2743   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2744   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2745   unsigned VAddrSize =
2746       IsNSA ? SrsrcIdx - VAddr0Idx
2747             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2748 
2749   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2750                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2751                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2752                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2753   if (!IsNSA) {
2754     if (AddrSize > 8)
2755       AddrSize = 16;
2756     else if (AddrSize > 4)
2757       AddrSize = 8;
2758   }
2759 
2760   return VAddrSize == AddrSize;
2761 }
2762 
2763 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2764 
2765   const unsigned Opc = Inst.getOpcode();
2766   const MCInstrDesc &Desc = MII.get(Opc);
2767 
2768   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2769     return true;
2770   if (!Desc.mayLoad() || !Desc.mayStore())
2771     return true; // Not atomic
2772 
2773   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2774   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2775 
2776   // This is an incomplete check because image_atomic_cmpswap
2777   // may only use 0x3 and 0xf while other atomic operations
2778   // may use 0x1 and 0x3. However these limitations are
2779   // verified when we check that dmask matches dst size.
2780   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2781 }
2782 
2783 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2784 
2785   const unsigned Opc = Inst.getOpcode();
2786   const MCInstrDesc &Desc = MII.get(Opc);
2787 
2788   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2789     return true;
2790 
2791   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2792   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2793 
2794   // GATHER4 instructions use dmask in a different fashion compared to
2795   // other MIMG instructions. The only useful DMASK values are
2796   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2797   // (red,red,red,red) etc.) The ISA document doesn't mention
2798   // this.
2799   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2800 }
2801 
2802 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2803 
2804   const unsigned Opc = Inst.getOpcode();
2805   const MCInstrDesc &Desc = MII.get(Opc);
2806 
2807   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2808     return true;
2809 
2810   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2811   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2812     if (isCI() || isSI())
2813       return false;
2814   }
2815 
2816   return true;
2817 }
2818 
2819 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2820   const unsigned Opc = Inst.getOpcode();
2821   const MCInstrDesc &Desc = MII.get(Opc);
2822 
2823   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2824     return true;
2825 
2826   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2827   if (DimIdx < 0)
2828     return true;
2829 
2830   long Imm = Inst.getOperand(DimIdx).getImm();
2831   if (Imm < 0 || Imm >= 8)
2832     return false;
2833 
2834   return true;
2835 }
2836 
2837 static bool IsRevOpcode(const unsigned Opcode)
2838 {
2839   switch (Opcode) {
2840   case AMDGPU::V_SUBREV_F32_e32:
2841   case AMDGPU::V_SUBREV_F32_e64:
2842   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2843   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2844   case AMDGPU::V_SUBREV_F32_e32_vi:
2845   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2846   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2847   case AMDGPU::V_SUBREV_F32_e64_vi:
2848 
2849   case AMDGPU::V_SUBREV_I32_e32:
2850   case AMDGPU::V_SUBREV_I32_e64:
2851   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2852   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2853 
2854   case AMDGPU::V_SUBBREV_U32_e32:
2855   case AMDGPU::V_SUBBREV_U32_e64:
2856   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2857   case AMDGPU::V_SUBBREV_U32_e32_vi:
2858   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2859   case AMDGPU::V_SUBBREV_U32_e64_vi:
2860 
2861   case AMDGPU::V_SUBREV_U32_e32:
2862   case AMDGPU::V_SUBREV_U32_e64:
2863   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2864   case AMDGPU::V_SUBREV_U32_e32_vi:
2865   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2866   case AMDGPU::V_SUBREV_U32_e64_vi:
2867 
2868   case AMDGPU::V_SUBREV_F16_e32:
2869   case AMDGPU::V_SUBREV_F16_e64:
2870   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2871   case AMDGPU::V_SUBREV_F16_e32_vi:
2872   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2873   case AMDGPU::V_SUBREV_F16_e64_vi:
2874 
2875   case AMDGPU::V_SUBREV_U16_e32:
2876   case AMDGPU::V_SUBREV_U16_e64:
2877   case AMDGPU::V_SUBREV_U16_e32_vi:
2878   case AMDGPU::V_SUBREV_U16_e64_vi:
2879 
2880   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2881   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2882   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2883 
2884   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2885   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2886 
2887   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2888   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2889 
2890   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2891   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2892 
2893   case AMDGPU::V_LSHRREV_B32_e32:
2894   case AMDGPU::V_LSHRREV_B32_e64:
2895   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2896   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2897   case AMDGPU::V_LSHRREV_B32_e32_vi:
2898   case AMDGPU::V_LSHRREV_B32_e64_vi:
2899   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2900   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2901 
2902   case AMDGPU::V_ASHRREV_I32_e32:
2903   case AMDGPU::V_ASHRREV_I32_e64:
2904   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2905   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2906   case AMDGPU::V_ASHRREV_I32_e32_vi:
2907   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2908   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2909   case AMDGPU::V_ASHRREV_I32_e64_vi:
2910 
2911   case AMDGPU::V_LSHLREV_B32_e32:
2912   case AMDGPU::V_LSHLREV_B32_e64:
2913   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2914   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2915   case AMDGPU::V_LSHLREV_B32_e32_vi:
2916   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2917   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2918   case AMDGPU::V_LSHLREV_B32_e64_vi:
2919 
2920   case AMDGPU::V_LSHLREV_B16_e32:
2921   case AMDGPU::V_LSHLREV_B16_e64:
2922   case AMDGPU::V_LSHLREV_B16_e32_vi:
2923   case AMDGPU::V_LSHLREV_B16_e64_vi:
2924   case AMDGPU::V_LSHLREV_B16_gfx10:
2925 
2926   case AMDGPU::V_LSHRREV_B16_e32:
2927   case AMDGPU::V_LSHRREV_B16_e64:
2928   case AMDGPU::V_LSHRREV_B16_e32_vi:
2929   case AMDGPU::V_LSHRREV_B16_e64_vi:
2930   case AMDGPU::V_LSHRREV_B16_gfx10:
2931 
2932   case AMDGPU::V_ASHRREV_I16_e32:
2933   case AMDGPU::V_ASHRREV_I16_e64:
2934   case AMDGPU::V_ASHRREV_I16_e32_vi:
2935   case AMDGPU::V_ASHRREV_I16_e64_vi:
2936   case AMDGPU::V_ASHRREV_I16_gfx10:
2937 
2938   case AMDGPU::V_LSHLREV_B64:
2939   case AMDGPU::V_LSHLREV_B64_gfx10:
2940   case AMDGPU::V_LSHLREV_B64_vi:
2941 
2942   case AMDGPU::V_LSHRREV_B64:
2943   case AMDGPU::V_LSHRREV_B64_gfx10:
2944   case AMDGPU::V_LSHRREV_B64_vi:
2945 
2946   case AMDGPU::V_ASHRREV_I64:
2947   case AMDGPU::V_ASHRREV_I64_gfx10:
2948   case AMDGPU::V_ASHRREV_I64_vi:
2949 
2950   case AMDGPU::V_PK_LSHLREV_B16:
2951   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2952   case AMDGPU::V_PK_LSHLREV_B16_vi:
2953 
2954   case AMDGPU::V_PK_LSHRREV_B16:
2955   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2956   case AMDGPU::V_PK_LSHRREV_B16_vi:
2957   case AMDGPU::V_PK_ASHRREV_I16:
2958   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2959   case AMDGPU::V_PK_ASHRREV_I16_vi:
2960     return true;
2961   default:
2962     return false;
2963   }
2964 }
2965 
2966 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2967 
2968   using namespace SIInstrFlags;
2969   const unsigned Opcode = Inst.getOpcode();
2970   const MCInstrDesc &Desc = MII.get(Opcode);
2971 
2972   // lds_direct register is defined so that it can be used
2973   // with 9-bit operands only. Ignore encodings which do not accept these.
2974   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2975     return true;
2976 
2977   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2978   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2979   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2980 
2981   const int SrcIndices[] = { Src1Idx, Src2Idx };
2982 
2983   // lds_direct cannot be specified as either src1 or src2.
2984   for (int SrcIdx : SrcIndices) {
2985     if (SrcIdx == -1) break;
2986     const MCOperand &Src = Inst.getOperand(SrcIdx);
2987     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2988       return false;
2989     }
2990   }
2991 
2992   if (Src0Idx == -1)
2993     return true;
2994 
2995   const MCOperand &Src = Inst.getOperand(Src0Idx);
2996   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2997     return true;
2998 
2999   // lds_direct is specified as src0. Check additional limitations.
3000   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3001 }
3002 
3003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3004   unsigned Opcode = Inst.getOpcode();
3005   const MCInstrDesc &Desc = MII.get(Opcode);
3006   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3007     return true;
3008 
3009   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3010   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3011 
3012   const int OpIndices[] = { Src0Idx, Src1Idx };
3013 
3014   unsigned NumLiterals = 0;
3015   uint32_t LiteralValue;
3016 
3017   for (int OpIdx : OpIndices) {
3018     if (OpIdx == -1) break;
3019 
3020     const MCOperand &MO = Inst.getOperand(OpIdx);
3021     if (MO.isImm() &&
3022         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3023         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3024         !isInlineConstant(Inst, OpIdx)) {
3025       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3026       if (NumLiterals == 0 || LiteralValue != Value) {
3027         LiteralValue = Value;
3028         ++NumLiterals;
3029       }
3030     }
3031   }
3032 
3033   return NumLiterals <= 1;
3034 }
3035 
3036 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3037   const unsigned Opc = Inst.getOpcode();
3038   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3039       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3040     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3041     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3042 
3043     if (OpSel & ~3)
3044       return false;
3045   }
3046   return true;
3047 }
3048 
3049 // Check if VCC register matches wavefront size
3050 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3051   auto FB = getFeatureBits();
3052   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3053     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3054 }
3055 
3056 // VOP3 literal is only allowed in GFX10+ and only one can be used
3057 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3058   unsigned Opcode = Inst.getOpcode();
3059   const MCInstrDesc &Desc = MII.get(Opcode);
3060   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3061     return true;
3062 
3063   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3064   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3065   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3066 
3067   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3068 
3069   unsigned NumLiterals = 0;
3070   uint32_t LiteralValue;
3071 
3072   for (int OpIdx : OpIndices) {
3073     if (OpIdx == -1) break;
3074 
3075     const MCOperand &MO = Inst.getOperand(OpIdx);
3076     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3077       continue;
3078 
3079     if (!isInlineConstant(Inst, OpIdx)) {
3080       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3081       if (NumLiterals == 0 || LiteralValue != Value) {
3082         LiteralValue = Value;
3083         ++NumLiterals;
3084       }
3085     }
3086   }
3087 
3088   return !NumLiterals ||
3089          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3090 }
3091 
3092 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3093                                           const SMLoc &IDLoc) {
3094   if (!validateLdsDirect(Inst)) {
3095     Error(IDLoc,
3096       "invalid use of lds_direct");
3097     return false;
3098   }
3099   if (!validateSOPLiteral(Inst)) {
3100     Error(IDLoc,
3101       "only one literal operand is allowed");
3102     return false;
3103   }
3104   if (!validateVOP3Literal(Inst)) {
3105     Error(IDLoc,
3106       "invalid literal operand");
3107     return false;
3108   }
3109   if (!validateConstantBusLimitations(Inst)) {
3110     Error(IDLoc,
3111       "invalid operand (violates constant bus restrictions)");
3112     return false;
3113   }
3114   if (!validateEarlyClobberLimitations(Inst)) {
3115     Error(IDLoc,
3116       "destination must be different than all sources");
3117     return false;
3118   }
3119   if (!validateIntClampSupported(Inst)) {
3120     Error(IDLoc,
3121       "integer clamping is not supported on this GPU");
3122     return false;
3123   }
3124   if (!validateOpSel(Inst)) {
3125     Error(IDLoc,
3126       "invalid op_sel operand");
3127     return false;
3128   }
3129   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3130   if (!validateMIMGD16(Inst)) {
3131     Error(IDLoc,
3132       "d16 modifier is not supported on this GPU");
3133     return false;
3134   }
3135   if (!validateMIMGDim(Inst)) {
3136     Error(IDLoc, "dim modifier is required on this GPU");
3137     return false;
3138   }
3139   if (!validateMIMGDataSize(Inst)) {
3140     Error(IDLoc,
3141       "image data size does not match dmask and tfe");
3142     return false;
3143   }
3144   if (!validateMIMGAddrSize(Inst)) {
3145     Error(IDLoc,
3146       "image address size does not match dim and a16");
3147     return false;
3148   }
3149   if (!validateMIMGAtomicDMask(Inst)) {
3150     Error(IDLoc,
3151       "invalid atomic image dmask");
3152     return false;
3153   }
3154   if (!validateMIMGGatherDMask(Inst)) {
3155     Error(IDLoc,
3156       "invalid image_gather dmask: only one bit must be set");
3157     return false;
3158   }
3159 
3160   return true;
3161 }
3162 
3163 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3164                                             const FeatureBitset &FBS,
3165                                             unsigned VariantID = 0);
3166 
3167 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3168                                               OperandVector &Operands,
3169                                               MCStreamer &Out,
3170                                               uint64_t &ErrorInfo,
3171                                               bool MatchingInlineAsm) {
3172   MCInst Inst;
3173   unsigned Result = Match_Success;
3174   for (auto Variant : getMatchedVariants()) {
3175     uint64_t EI;
3176     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3177                                   Variant);
3178     // We order match statuses from least to most specific. We use most specific
3179     // status as resulting
3180     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3181     if ((R == Match_Success) ||
3182         (R == Match_PreferE32) ||
3183         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3184         (R == Match_InvalidOperand && Result != Match_MissingFeature
3185                                    && Result != Match_PreferE32) ||
3186         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3187                                    && Result != Match_MissingFeature
3188                                    && Result != Match_PreferE32)) {
3189       Result = R;
3190       ErrorInfo = EI;
3191     }
3192     if (R == Match_Success)
3193       break;
3194   }
3195 
3196   switch (Result) {
3197   default: break;
3198   case Match_Success:
3199     if (!validateInstruction(Inst, IDLoc)) {
3200       return true;
3201     }
3202     Inst.setLoc(IDLoc);
3203     Out.EmitInstruction(Inst, getSTI());
3204     return false;
3205 
3206   case Match_MissingFeature:
3207     return Error(IDLoc, "instruction not supported on this GPU");
3208 
3209   case Match_MnemonicFail: {
3210     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3211     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3212         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3213     return Error(IDLoc, "invalid instruction" + Suggestion,
3214                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3215   }
3216 
3217   case Match_InvalidOperand: {
3218     SMLoc ErrorLoc = IDLoc;
3219     if (ErrorInfo != ~0ULL) {
3220       if (ErrorInfo >= Operands.size()) {
3221         return Error(IDLoc, "too few operands for instruction");
3222       }
3223       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3224       if (ErrorLoc == SMLoc())
3225         ErrorLoc = IDLoc;
3226     }
3227     return Error(ErrorLoc, "invalid operand for instruction");
3228   }
3229 
3230   case Match_PreferE32:
3231     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3232                         "should be encoded as e32");
3233   }
3234   llvm_unreachable("Implement any new match types added!");
3235 }
3236 
3237 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3238   int64_t Tmp = -1;
3239   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3240     return true;
3241   }
3242   if (getParser().parseAbsoluteExpression(Tmp)) {
3243     return true;
3244   }
3245   Ret = static_cast<uint32_t>(Tmp);
3246   return false;
3247 }
3248 
3249 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3250                                                uint32_t &Minor) {
3251   if (ParseAsAbsoluteExpression(Major))
3252     return TokError("invalid major version");
3253 
3254   if (getLexer().isNot(AsmToken::Comma))
3255     return TokError("minor version number required, comma expected");
3256   Lex();
3257 
3258   if (ParseAsAbsoluteExpression(Minor))
3259     return TokError("invalid minor version");
3260 
3261   return false;
3262 }
3263 
3264 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3265   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3266     return TokError("directive only supported for amdgcn architecture");
3267 
3268   std::string Target;
3269 
3270   SMLoc TargetStart = getTok().getLoc();
3271   if (getParser().parseEscapedString(Target))
3272     return true;
3273   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3274 
3275   std::string ExpectedTarget;
3276   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3277   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3278 
3279   if (Target != ExpectedTargetOS.str())
3280     return getParser().Error(TargetRange.Start, "target must match options",
3281                              TargetRange);
3282 
3283   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3284   return false;
3285 }
3286 
3287 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3288   return getParser().Error(Range.Start, "value out of range", Range);
3289 }
3290 
3291 bool AMDGPUAsmParser::calculateGPRBlocks(
3292     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3293     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3294     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3295     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3296   // TODO(scott.linder): These calculations are duplicated from
3297   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3298   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3299 
3300   unsigned NumVGPRs = NextFreeVGPR;
3301   unsigned NumSGPRs = NextFreeSGPR;
3302 
3303   if (Version.Major >= 10)
3304     NumSGPRs = 0;
3305   else {
3306     unsigned MaxAddressableNumSGPRs =
3307         IsaInfo::getAddressableNumSGPRs(&getSTI());
3308 
3309     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3310         NumSGPRs > MaxAddressableNumSGPRs)
3311       return OutOfRangeError(SGPRRange);
3312 
3313     NumSGPRs +=
3314         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3315 
3316     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3317         NumSGPRs > MaxAddressableNumSGPRs)
3318       return OutOfRangeError(SGPRRange);
3319 
3320     if (Features.test(FeatureSGPRInitBug))
3321       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3322   }
3323 
3324   VGPRBlocks =
3325       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3326   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3327 
3328   return false;
3329 }
3330 
3331 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3332   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3333     return TokError("directive only supported for amdgcn architecture");
3334 
3335   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3336     return TokError("directive only supported for amdhsa OS");
3337 
3338   StringRef KernelName;
3339   if (getParser().parseIdentifier(KernelName))
3340     return true;
3341 
3342   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3343 
3344   StringSet<> Seen;
3345 
3346   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3347 
3348   SMRange VGPRRange;
3349   uint64_t NextFreeVGPR = 0;
3350   SMRange SGPRRange;
3351   uint64_t NextFreeSGPR = 0;
3352   unsigned UserSGPRCount = 0;
3353   bool ReserveVCC = true;
3354   bool ReserveFlatScr = true;
3355   bool ReserveXNACK = hasXNACK();
3356   Optional<bool> EnableWavefrontSize32;
3357 
3358   while (true) {
3359     while (getLexer().is(AsmToken::EndOfStatement))
3360       Lex();
3361 
3362     if (getLexer().isNot(AsmToken::Identifier))
3363       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3364 
3365     StringRef ID = getTok().getIdentifier();
3366     SMRange IDRange = getTok().getLocRange();
3367     Lex();
3368 
3369     if (ID == ".end_amdhsa_kernel")
3370       break;
3371 
3372     if (Seen.find(ID) != Seen.end())
3373       return TokError(".amdhsa_ directives cannot be repeated");
3374     Seen.insert(ID);
3375 
3376     SMLoc ValStart = getTok().getLoc();
3377     int64_t IVal;
3378     if (getParser().parseAbsoluteExpression(IVal))
3379       return true;
3380     SMLoc ValEnd = getTok().getLoc();
3381     SMRange ValRange = SMRange(ValStart, ValEnd);
3382 
3383     if (IVal < 0)
3384       return OutOfRangeError(ValRange);
3385 
3386     uint64_t Val = IVal;
3387 
3388 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3389   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3390     return OutOfRangeError(RANGE);                                             \
3391   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3392 
3393     if (ID == ".amdhsa_group_segment_fixed_size") {
3394       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3395         return OutOfRangeError(ValRange);
3396       KD.group_segment_fixed_size = Val;
3397     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3398       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3399         return OutOfRangeError(ValRange);
3400       KD.private_segment_fixed_size = Val;
3401     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3402       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3403                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3404                        Val, ValRange);
3405       UserSGPRCount += 4;
3406     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3407       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3408                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3409                        ValRange);
3410       UserSGPRCount += 2;
3411     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3412       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3413                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3414                        ValRange);
3415       UserSGPRCount += 2;
3416     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3417       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3418                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3419                        Val, ValRange);
3420       UserSGPRCount += 2;
3421     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3422       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3423                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3424                        ValRange);
3425       UserSGPRCount += 2;
3426     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3427       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3428                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3429                        ValRange);
3430       UserSGPRCount += 2;
3431     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3432       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3433                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3434                        Val, ValRange);
3435       UserSGPRCount += 1;
3436     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3437       PARSE_BITS_ENTRY(
3438           KD.compute_pgm_rsrc2,
3439           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3440           ValRange);
3441     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3442       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3443                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3444                        ValRange);
3445     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3446       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3447                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3448                        ValRange);
3449     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3450       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3451                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3452                        ValRange);
3453     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3454       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3455                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3456                        ValRange);
3457     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3458       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3459                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3460                        ValRange);
3461     } else if (ID == ".amdhsa_next_free_vgpr") {
3462       VGPRRange = ValRange;
3463       NextFreeVGPR = Val;
3464     } else if (ID == ".amdhsa_next_free_sgpr") {
3465       SGPRRange = ValRange;
3466       NextFreeSGPR = Val;
3467     } else if (ID == ".amdhsa_reserve_vcc") {
3468       if (!isUInt<1>(Val))
3469         return OutOfRangeError(ValRange);
3470       ReserveVCC = Val;
3471     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3472       if (IVersion.Major < 7)
3473         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3474                                  IDRange);
3475       if (!isUInt<1>(Val))
3476         return OutOfRangeError(ValRange);
3477       ReserveFlatScr = Val;
3478     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3479       if (IVersion.Major < 8)
3480         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3481                                  IDRange);
3482       if (!isUInt<1>(Val))
3483         return OutOfRangeError(ValRange);
3484       ReserveXNACK = Val;
3485     } else if (ID == ".amdhsa_float_round_mode_32") {
3486       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3487                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3488     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3489       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3490                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3491     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3492       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3493                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3494     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3495       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3496                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3497                        ValRange);
3498     } else if (ID == ".amdhsa_dx10_clamp") {
3499       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3500                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3501     } else if (ID == ".amdhsa_ieee_mode") {
3502       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3503                        Val, ValRange);
3504     } else if (ID == ".amdhsa_fp16_overflow") {
3505       if (IVersion.Major < 9)
3506         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3507                                  IDRange);
3508       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3509                        ValRange);
3510     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3511       if (IVersion.Major < 10)
3512         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3513                                  IDRange);
3514       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3515                        ValRange);
3516     } else if (ID == ".amdhsa_memory_ordered") {
3517       if (IVersion.Major < 10)
3518         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3519                                  IDRange);
3520       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3521                        ValRange);
3522     } else if (ID == ".amdhsa_forward_progress") {
3523       if (IVersion.Major < 10)
3524         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3525                                  IDRange);
3526       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3527                        ValRange);
3528     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3529       PARSE_BITS_ENTRY(
3530           KD.compute_pgm_rsrc2,
3531           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3532           ValRange);
3533     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3534       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3535                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3536                        Val, ValRange);
3537     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3538       PARSE_BITS_ENTRY(
3539           KD.compute_pgm_rsrc2,
3540           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3541           ValRange);
3542     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3543       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3544                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3545                        Val, ValRange);
3546     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3547       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3548                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3549                        Val, ValRange);
3550     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3551       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3552                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3553                        Val, ValRange);
3554     } else if (ID == ".amdhsa_exception_int_div_zero") {
3555       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3556                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3557                        Val, ValRange);
3558     } else {
3559       return getParser().Error(IDRange.Start,
3560                                "unknown .amdhsa_kernel directive", IDRange);
3561     }
3562 
3563 #undef PARSE_BITS_ENTRY
3564   }
3565 
3566   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3567     return TokError(".amdhsa_next_free_vgpr directive is required");
3568 
3569   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3570     return TokError(".amdhsa_next_free_sgpr directive is required");
3571 
3572   unsigned VGPRBlocks;
3573   unsigned SGPRBlocks;
3574   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3575                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3576                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3577                          SGPRBlocks))
3578     return true;
3579 
3580   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3581           VGPRBlocks))
3582     return OutOfRangeError(VGPRRange);
3583   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3584                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3585 
3586   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3587           SGPRBlocks))
3588     return OutOfRangeError(SGPRRange);
3589   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3590                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3591                   SGPRBlocks);
3592 
3593   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3594     return TokError("too many user SGPRs enabled");
3595   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3596                   UserSGPRCount);
3597 
3598   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3599       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3600       ReserveFlatScr, ReserveXNACK);
3601   return false;
3602 }
3603 
3604 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3605   uint32_t Major;
3606   uint32_t Minor;
3607 
3608   if (ParseDirectiveMajorMinor(Major, Minor))
3609     return true;
3610 
3611   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3612   return false;
3613 }
3614 
3615 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3616   uint32_t Major;
3617   uint32_t Minor;
3618   uint32_t Stepping;
3619   StringRef VendorName;
3620   StringRef ArchName;
3621 
3622   // If this directive has no arguments, then use the ISA version for the
3623   // targeted GPU.
3624   if (getLexer().is(AsmToken::EndOfStatement)) {
3625     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3626     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3627                                                       ISA.Stepping,
3628                                                       "AMD", "AMDGPU");
3629     return false;
3630   }
3631 
3632   if (ParseDirectiveMajorMinor(Major, Minor))
3633     return true;
3634 
3635   if (getLexer().isNot(AsmToken::Comma))
3636     return TokError("stepping version number required, comma expected");
3637   Lex();
3638 
3639   if (ParseAsAbsoluteExpression(Stepping))
3640     return TokError("invalid stepping version");
3641 
3642   if (getLexer().isNot(AsmToken::Comma))
3643     return TokError("vendor name required, comma expected");
3644   Lex();
3645 
3646   if (getLexer().isNot(AsmToken::String))
3647     return TokError("invalid vendor name");
3648 
3649   VendorName = getLexer().getTok().getStringContents();
3650   Lex();
3651 
3652   if (getLexer().isNot(AsmToken::Comma))
3653     return TokError("arch name required, comma expected");
3654   Lex();
3655 
3656   if (getLexer().isNot(AsmToken::String))
3657     return TokError("invalid arch name");
3658 
3659   ArchName = getLexer().getTok().getStringContents();
3660   Lex();
3661 
3662   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3663                                                     VendorName, ArchName);
3664   return false;
3665 }
3666 
3667 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3668                                                amd_kernel_code_t &Header) {
3669   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3670   // assembly for backwards compatibility.
3671   if (ID == "max_scratch_backing_memory_byte_size") {
3672     Parser.eatToEndOfStatement();
3673     return false;
3674   }
3675 
3676   SmallString<40> ErrStr;
3677   raw_svector_ostream Err(ErrStr);
3678   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3679     return TokError(Err.str());
3680   }
3681   Lex();
3682 
3683   if (ID == "enable_wgp_mode") {
3684     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3685       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3686   }
3687 
3688   if (ID == "enable_mem_ordered") {
3689     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3690       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3691   }
3692 
3693   if (ID == "enable_fwd_progress") {
3694     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3695       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3696   }
3697 
3698   return false;
3699 }
3700 
3701 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3702   amd_kernel_code_t Header;
3703   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3704 
3705   while (true) {
3706     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3707     // will set the current token to EndOfStatement.
3708     while(getLexer().is(AsmToken::EndOfStatement))
3709       Lex();
3710 
3711     if (getLexer().isNot(AsmToken::Identifier))
3712       return TokError("expected value identifier or .end_amd_kernel_code_t");
3713 
3714     StringRef ID = getLexer().getTok().getIdentifier();
3715     Lex();
3716 
3717     if (ID == ".end_amd_kernel_code_t")
3718       break;
3719 
3720     if (ParseAMDKernelCodeTValue(ID, Header))
3721       return true;
3722   }
3723 
3724   getTargetStreamer().EmitAMDKernelCodeT(Header);
3725 
3726   return false;
3727 }
3728 
3729 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3730   if (getLexer().isNot(AsmToken::Identifier))
3731     return TokError("expected symbol name");
3732 
3733   StringRef KernelName = Parser.getTok().getString();
3734 
3735   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3736                                            ELF::STT_AMDGPU_HSA_KERNEL);
3737   Lex();
3738   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3739     KernelScope.initialize(getContext());
3740   return false;
3741 }
3742 
3743 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3744   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3745     return Error(getParser().getTok().getLoc(),
3746                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3747                  "architectures");
3748   }
3749 
3750   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3751 
3752   std::string ISAVersionStringFromSTI;
3753   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3754   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3755 
3756   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3757     return Error(getParser().getTok().getLoc(),
3758                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3759                  "arguments specified through the command line");
3760   }
3761 
3762   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3763   Lex();
3764 
3765   return false;
3766 }
3767 
3768 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3769   const char *AssemblerDirectiveBegin;
3770   const char *AssemblerDirectiveEnd;
3771   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3772       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3773           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3774                             HSAMD::V3::AssemblerDirectiveEnd)
3775           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3776                             HSAMD::AssemblerDirectiveEnd);
3777 
3778   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3779     return Error(getParser().getTok().getLoc(),
3780                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3781                  "not available on non-amdhsa OSes")).str());
3782   }
3783 
3784   std::string HSAMetadataString;
3785   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3786                           HSAMetadataString))
3787     return true;
3788 
3789   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3790     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3791       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3792   } else {
3793     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3794       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3795   }
3796 
3797   return false;
3798 }
3799 
3800 /// Common code to parse out a block of text (typically YAML) between start and
3801 /// end directives.
3802 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3803                                           const char *AssemblerDirectiveEnd,
3804                                           std::string &CollectString) {
3805 
3806   raw_string_ostream CollectStream(CollectString);
3807 
3808   getLexer().setSkipSpace(false);
3809 
3810   bool FoundEnd = false;
3811   while (!getLexer().is(AsmToken::Eof)) {
3812     while (getLexer().is(AsmToken::Space)) {
3813       CollectStream << getLexer().getTok().getString();
3814       Lex();
3815     }
3816 
3817     if (getLexer().is(AsmToken::Identifier)) {
3818       StringRef ID = getLexer().getTok().getIdentifier();
3819       if (ID == AssemblerDirectiveEnd) {
3820         Lex();
3821         FoundEnd = true;
3822         break;
3823       }
3824     }
3825 
3826     CollectStream << Parser.parseStringToEndOfStatement()
3827                   << getContext().getAsmInfo()->getSeparatorString();
3828 
3829     Parser.eatToEndOfStatement();
3830   }
3831 
3832   getLexer().setSkipSpace(true);
3833 
3834   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3835     return TokError(Twine("expected directive ") +
3836                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3837   }
3838 
3839   CollectStream.flush();
3840   return false;
3841 }
3842 
3843 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3844 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3845   std::string String;
3846   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3847                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3848     return true;
3849 
3850   auto PALMetadata = getTargetStreamer().getPALMetadata();
3851   if (!PALMetadata->setFromString(String))
3852     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3853   return false;
3854 }
3855 
3856 /// Parse the assembler directive for old linear-format PAL metadata.
3857 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3858   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3859     return Error(getParser().getTok().getLoc(),
3860                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3861                  "not available on non-amdpal OSes")).str());
3862   }
3863 
3864   auto PALMetadata = getTargetStreamer().getPALMetadata();
3865   PALMetadata->setLegacy();
3866   for (;;) {
3867     uint32_t Key, Value;
3868     if (ParseAsAbsoluteExpression(Key)) {
3869       return TokError(Twine("invalid value in ") +
3870                       Twine(PALMD::AssemblerDirective));
3871     }
3872     if (getLexer().isNot(AsmToken::Comma)) {
3873       return TokError(Twine("expected an even number of values in ") +
3874                       Twine(PALMD::AssemblerDirective));
3875     }
3876     Lex();
3877     if (ParseAsAbsoluteExpression(Value)) {
3878       return TokError(Twine("invalid value in ") +
3879                       Twine(PALMD::AssemblerDirective));
3880     }
3881     PALMetadata->setRegister(Key, Value);
3882     if (getLexer().isNot(AsmToken::Comma))
3883       break;
3884     Lex();
3885   }
3886   return false;
3887 }
3888 
3889 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3890   StringRef IDVal = DirectiveID.getString();
3891 
3892   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3893     if (IDVal == ".amdgcn_target")
3894       return ParseDirectiveAMDGCNTarget();
3895 
3896     if (IDVal == ".amdhsa_kernel")
3897       return ParseDirectiveAMDHSAKernel();
3898 
3899     // TODO: Restructure/combine with PAL metadata directive.
3900     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3901       return ParseDirectiveHSAMetadata();
3902   } else {
3903     if (IDVal == ".hsa_code_object_version")
3904       return ParseDirectiveHSACodeObjectVersion();
3905 
3906     if (IDVal == ".hsa_code_object_isa")
3907       return ParseDirectiveHSACodeObjectISA();
3908 
3909     if (IDVal == ".amd_kernel_code_t")
3910       return ParseDirectiveAMDKernelCodeT();
3911 
3912     if (IDVal == ".amdgpu_hsa_kernel")
3913       return ParseDirectiveAMDGPUHsaKernel();
3914 
3915     if (IDVal == ".amd_amdgpu_isa")
3916       return ParseDirectiveISAVersion();
3917 
3918     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3919       return ParseDirectiveHSAMetadata();
3920   }
3921 
3922   if (IDVal == PALMD::AssemblerDirectiveBegin)
3923     return ParseDirectivePALMetadataBegin();
3924 
3925   if (IDVal == PALMD::AssemblerDirective)
3926     return ParseDirectivePALMetadata();
3927 
3928   return true;
3929 }
3930 
3931 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3932                                            unsigned RegNo) const {
3933 
3934   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3935        R.isValid(); ++R) {
3936     if (*R == RegNo)
3937       return isGFX9() || isGFX10();
3938   }
3939 
3940   // GFX10 has 2 more SGPRs 104 and 105.
3941   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3942        R.isValid(); ++R) {
3943     if (*R == RegNo)
3944       return hasSGPR104_SGPR105();
3945   }
3946 
3947   switch (RegNo) {
3948   case AMDGPU::SRC_SHARED_BASE:
3949   case AMDGPU::SRC_SHARED_LIMIT:
3950   case AMDGPU::SRC_PRIVATE_BASE:
3951   case AMDGPU::SRC_PRIVATE_LIMIT:
3952   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
3953     return !isCI() && !isSI() && !isVI();
3954   case AMDGPU::TBA:
3955   case AMDGPU::TBA_LO:
3956   case AMDGPU::TBA_HI:
3957   case AMDGPU::TMA:
3958   case AMDGPU::TMA_LO:
3959   case AMDGPU::TMA_HI:
3960     return !isGFX9() && !isGFX10();
3961   case AMDGPU::XNACK_MASK:
3962   case AMDGPU::XNACK_MASK_LO:
3963   case AMDGPU::XNACK_MASK_HI:
3964     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3965   case AMDGPU::SGPR_NULL:
3966     return isGFX10();
3967   default:
3968     break;
3969   }
3970 
3971   if (isCI())
3972     return true;
3973 
3974   if (isSI() || isGFX10()) {
3975     // No flat_scr on SI.
3976     // On GFX10 flat scratch is not a valid register operand and can only be
3977     // accessed with s_setreg/s_getreg.
3978     switch (RegNo) {
3979     case AMDGPU::FLAT_SCR:
3980     case AMDGPU::FLAT_SCR_LO:
3981     case AMDGPU::FLAT_SCR_HI:
3982       return false;
3983     default:
3984       return true;
3985     }
3986   }
3987 
3988   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3989   // SI/CI have.
3990   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3991        R.isValid(); ++R) {
3992     if (*R == RegNo)
3993       return hasSGPR102_SGPR103();
3994   }
3995 
3996   return true;
3997 }
3998 
3999 OperandMatchResultTy
4000 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4001                               OperandMode Mode) {
4002   // Try to parse with a custom parser
4003   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4004 
4005   // If we successfully parsed the operand or if there as an error parsing,
4006   // we are done.
4007   //
4008   // If we are parsing after we reach EndOfStatement then this means we
4009   // are appending default values to the Operands list.  This is only done
4010   // by custom parser, so we shouldn't continue on to the generic parsing.
4011   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4012       getLexer().is(AsmToken::EndOfStatement))
4013     return ResTy;
4014 
4015   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4016     unsigned Prefix = Operands.size();
4017     SMLoc LBraceLoc = getTok().getLoc();
4018     Parser.Lex(); // eat the '['
4019 
4020     for (;;) {
4021       ResTy = parseReg(Operands);
4022       if (ResTy != MatchOperand_Success)
4023         return ResTy;
4024 
4025       if (getLexer().is(AsmToken::RBrac))
4026         break;
4027 
4028       if (getLexer().isNot(AsmToken::Comma))
4029         return MatchOperand_ParseFail;
4030       Parser.Lex();
4031     }
4032 
4033     if (Operands.size() - Prefix > 1) {
4034       Operands.insert(Operands.begin() + Prefix,
4035                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4036       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4037                                                     getTok().getLoc()));
4038     }
4039 
4040     Parser.Lex(); // eat the ']'
4041     return MatchOperand_Success;
4042   }
4043 
4044   return parseRegOrImm(Operands);
4045 }
4046 
4047 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4048   // Clear any forced encodings from the previous instruction.
4049   setForcedEncodingSize(0);
4050   setForcedDPP(false);
4051   setForcedSDWA(false);
4052 
4053   if (Name.endswith("_e64")) {
4054     setForcedEncodingSize(64);
4055     return Name.substr(0, Name.size() - 4);
4056   } else if (Name.endswith("_e32")) {
4057     setForcedEncodingSize(32);
4058     return Name.substr(0, Name.size() - 4);
4059   } else if (Name.endswith("_dpp")) {
4060     setForcedDPP(true);
4061     return Name.substr(0, Name.size() - 4);
4062   } else if (Name.endswith("_sdwa")) {
4063     setForcedSDWA(true);
4064     return Name.substr(0, Name.size() - 5);
4065   }
4066   return Name;
4067 }
4068 
4069 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4070                                        StringRef Name,
4071                                        SMLoc NameLoc, OperandVector &Operands) {
4072   // Add the instruction mnemonic
4073   Name = parseMnemonicSuffix(Name);
4074   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4075 
4076   bool IsMIMG = Name.startswith("image_");
4077 
4078   while (!getLexer().is(AsmToken::EndOfStatement)) {
4079     OperandMode Mode = OperandMode_Default;
4080     if (IsMIMG && isGFX10() && Operands.size() == 2)
4081       Mode = OperandMode_NSA;
4082     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4083 
4084     // Eat the comma or space if there is one.
4085     if (getLexer().is(AsmToken::Comma))
4086       Parser.Lex();
4087 
4088     switch (Res) {
4089       case MatchOperand_Success: break;
4090       case MatchOperand_ParseFail:
4091         // FIXME: use real operand location rather than the current location.
4092         Error(getLexer().getLoc(), "failed parsing operand.");
4093         while (!getLexer().is(AsmToken::EndOfStatement)) {
4094           Parser.Lex();
4095         }
4096         return true;
4097       case MatchOperand_NoMatch:
4098         // FIXME: use real operand location rather than the current location.
4099         Error(getLexer().getLoc(), "not a valid operand.");
4100         while (!getLexer().is(AsmToken::EndOfStatement)) {
4101           Parser.Lex();
4102         }
4103         return true;
4104     }
4105   }
4106 
4107   return false;
4108 }
4109 
4110 //===----------------------------------------------------------------------===//
4111 // Utility functions
4112 //===----------------------------------------------------------------------===//
4113 
4114 OperandMatchResultTy
4115 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4116 
4117   if (!trySkipId(Prefix, AsmToken::Colon))
4118     return MatchOperand_NoMatch;
4119 
4120   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4121 }
4122 
4123 OperandMatchResultTy
4124 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4125                                     AMDGPUOperand::ImmTy ImmTy,
4126                                     bool (*ConvertResult)(int64_t&)) {
4127   SMLoc S = getLoc();
4128   int64_t Value = 0;
4129 
4130   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4131   if (Res != MatchOperand_Success)
4132     return Res;
4133 
4134   if (ConvertResult && !ConvertResult(Value)) {
4135     Error(S, "invalid " + StringRef(Prefix) + " value.");
4136   }
4137 
4138   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4139   return MatchOperand_Success;
4140 }
4141 
4142 OperandMatchResultTy
4143 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4144                                              OperandVector &Operands,
4145                                              AMDGPUOperand::ImmTy ImmTy,
4146                                              bool (*ConvertResult)(int64_t&)) {
4147   SMLoc S = getLoc();
4148   if (!trySkipId(Prefix, AsmToken::Colon))
4149     return MatchOperand_NoMatch;
4150 
4151   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4152     return MatchOperand_ParseFail;
4153 
4154   unsigned Val = 0;
4155   const unsigned MaxSize = 4;
4156 
4157   // FIXME: How to verify the number of elements matches the number of src
4158   // operands?
4159   for (int I = 0; ; ++I) {
4160     int64_t Op;
4161     SMLoc Loc = getLoc();
4162     if (!parseExpr(Op))
4163       return MatchOperand_ParseFail;
4164 
4165     if (Op != 0 && Op != 1) {
4166       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4167       return MatchOperand_ParseFail;
4168     }
4169 
4170     Val |= (Op << I);
4171 
4172     if (trySkipToken(AsmToken::RBrac))
4173       break;
4174 
4175     if (I + 1 == MaxSize) {
4176       Error(getLoc(), "expected a closing square bracket");
4177       return MatchOperand_ParseFail;
4178     }
4179 
4180     if (!skipToken(AsmToken::Comma, "expected a comma"))
4181       return MatchOperand_ParseFail;
4182   }
4183 
4184   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4185   return MatchOperand_Success;
4186 }
4187 
4188 OperandMatchResultTy
4189 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4190                                AMDGPUOperand::ImmTy ImmTy) {
4191   int64_t Bit = 0;
4192   SMLoc S = Parser.getTok().getLoc();
4193 
4194   // We are at the end of the statement, and this is a default argument, so
4195   // use a default value.
4196   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4197     switch(getLexer().getKind()) {
4198       case AsmToken::Identifier: {
4199         StringRef Tok = Parser.getTok().getString();
4200         if (Tok == Name) {
4201           if (Tok == "r128" && isGFX9())
4202             Error(S, "r128 modifier is not supported on this GPU");
4203           if (Tok == "a16" && !isGFX9())
4204             Error(S, "a16 modifier is not supported on this GPU");
4205           Bit = 1;
4206           Parser.Lex();
4207         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4208           Bit = 0;
4209           Parser.Lex();
4210         } else {
4211           return MatchOperand_NoMatch;
4212         }
4213         break;
4214       }
4215       default:
4216         return MatchOperand_NoMatch;
4217     }
4218   }
4219 
4220   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4221     return MatchOperand_ParseFail;
4222 
4223   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4224   return MatchOperand_Success;
4225 }
4226 
4227 static void addOptionalImmOperand(
4228   MCInst& Inst, const OperandVector& Operands,
4229   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4230   AMDGPUOperand::ImmTy ImmT,
4231   int64_t Default = 0) {
4232   auto i = OptionalIdx.find(ImmT);
4233   if (i != OptionalIdx.end()) {
4234     unsigned Idx = i->second;
4235     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4236   } else {
4237     Inst.addOperand(MCOperand::createImm(Default));
4238   }
4239 }
4240 
4241 OperandMatchResultTy
4242 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4243   if (getLexer().isNot(AsmToken::Identifier)) {
4244     return MatchOperand_NoMatch;
4245   }
4246   StringRef Tok = Parser.getTok().getString();
4247   if (Tok != Prefix) {
4248     return MatchOperand_NoMatch;
4249   }
4250 
4251   Parser.Lex();
4252   if (getLexer().isNot(AsmToken::Colon)) {
4253     return MatchOperand_ParseFail;
4254   }
4255 
4256   Parser.Lex();
4257   if (getLexer().isNot(AsmToken::Identifier)) {
4258     return MatchOperand_ParseFail;
4259   }
4260 
4261   Value = Parser.getTok().getString();
4262   return MatchOperand_Success;
4263 }
4264 
4265 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4266 // values to live in a joint format operand in the MCInst encoding.
4267 OperandMatchResultTy
4268 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4269   SMLoc S = Parser.getTok().getLoc();
4270   int64_t Dfmt = 0, Nfmt = 0;
4271   // dfmt and nfmt can appear in either order, and each is optional.
4272   bool GotDfmt = false, GotNfmt = false;
4273   while (!GotDfmt || !GotNfmt) {
4274     if (!GotDfmt) {
4275       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4276       if (Res != MatchOperand_NoMatch) {
4277         if (Res != MatchOperand_Success)
4278           return Res;
4279         if (Dfmt >= 16) {
4280           Error(Parser.getTok().getLoc(), "out of range dfmt");
4281           return MatchOperand_ParseFail;
4282         }
4283         GotDfmt = true;
4284         Parser.Lex();
4285         continue;
4286       }
4287     }
4288     if (!GotNfmt) {
4289       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4290       if (Res != MatchOperand_NoMatch) {
4291         if (Res != MatchOperand_Success)
4292           return Res;
4293         if (Nfmt >= 8) {
4294           Error(Parser.getTok().getLoc(), "out of range nfmt");
4295           return MatchOperand_ParseFail;
4296         }
4297         GotNfmt = true;
4298         Parser.Lex();
4299         continue;
4300       }
4301     }
4302     break;
4303   }
4304   if (!GotDfmt && !GotNfmt)
4305     return MatchOperand_NoMatch;
4306   auto Format = Dfmt | Nfmt << 4;
4307   Operands.push_back(
4308       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4309   return MatchOperand_Success;
4310 }
4311 
4312 //===----------------------------------------------------------------------===//
4313 // ds
4314 //===----------------------------------------------------------------------===//
4315 
4316 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4317                                     const OperandVector &Operands) {
4318   OptionalImmIndexMap OptionalIdx;
4319 
4320   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4321     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4322 
4323     // Add the register arguments
4324     if (Op.isReg()) {
4325       Op.addRegOperands(Inst, 1);
4326       continue;
4327     }
4328 
4329     // Handle optional arguments
4330     OptionalIdx[Op.getImmTy()] = i;
4331   }
4332 
4333   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4334   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4335   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4336 
4337   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4338 }
4339 
4340 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4341                                 bool IsGdsHardcoded) {
4342   OptionalImmIndexMap OptionalIdx;
4343 
4344   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4345     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4346 
4347     // Add the register arguments
4348     if (Op.isReg()) {
4349       Op.addRegOperands(Inst, 1);
4350       continue;
4351     }
4352 
4353     if (Op.isToken() && Op.getToken() == "gds") {
4354       IsGdsHardcoded = true;
4355       continue;
4356     }
4357 
4358     // Handle optional arguments
4359     OptionalIdx[Op.getImmTy()] = i;
4360   }
4361 
4362   AMDGPUOperand::ImmTy OffsetType =
4363     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4364      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4365      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4366                                                       AMDGPUOperand::ImmTyOffset;
4367 
4368   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4369 
4370   if (!IsGdsHardcoded) {
4371     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4372   }
4373   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4374 }
4375 
4376 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4377   OptionalImmIndexMap OptionalIdx;
4378 
4379   unsigned OperandIdx[4];
4380   unsigned EnMask = 0;
4381   int SrcIdx = 0;
4382 
4383   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4384     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4385 
4386     // Add the register arguments
4387     if (Op.isReg()) {
4388       assert(SrcIdx < 4);
4389       OperandIdx[SrcIdx] = Inst.size();
4390       Op.addRegOperands(Inst, 1);
4391       ++SrcIdx;
4392       continue;
4393     }
4394 
4395     if (Op.isOff()) {
4396       assert(SrcIdx < 4);
4397       OperandIdx[SrcIdx] = Inst.size();
4398       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4399       ++SrcIdx;
4400       continue;
4401     }
4402 
4403     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4404       Op.addImmOperands(Inst, 1);
4405       continue;
4406     }
4407 
4408     if (Op.isToken() && Op.getToken() == "done")
4409       continue;
4410 
4411     // Handle optional arguments
4412     OptionalIdx[Op.getImmTy()] = i;
4413   }
4414 
4415   assert(SrcIdx == 4);
4416 
4417   bool Compr = false;
4418   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4419     Compr = true;
4420     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4421     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4422     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4423   }
4424 
4425   for (auto i = 0; i < SrcIdx; ++i) {
4426     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4427       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4428     }
4429   }
4430 
4431   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4432   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4433 
4434   Inst.addOperand(MCOperand::createImm(EnMask));
4435 }
4436 
4437 //===----------------------------------------------------------------------===//
4438 // s_waitcnt
4439 //===----------------------------------------------------------------------===//
4440 
4441 static bool
4442 encodeCnt(
4443   const AMDGPU::IsaVersion ISA,
4444   int64_t &IntVal,
4445   int64_t CntVal,
4446   bool Saturate,
4447   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4448   unsigned (*decode)(const IsaVersion &Version, unsigned))
4449 {
4450   bool Failed = false;
4451 
4452   IntVal = encode(ISA, IntVal, CntVal);
4453   if (CntVal != decode(ISA, IntVal)) {
4454     if (Saturate) {
4455       IntVal = encode(ISA, IntVal, -1);
4456     } else {
4457       Failed = true;
4458     }
4459   }
4460   return Failed;
4461 }
4462 
4463 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4464 
4465   SMLoc CntLoc = getLoc();
4466   StringRef CntName = getTokenStr();
4467 
4468   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4469       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4470     return false;
4471 
4472   int64_t CntVal;
4473   SMLoc ValLoc = getLoc();
4474   if (!parseExpr(CntVal))
4475     return false;
4476 
4477   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4478 
4479   bool Failed = true;
4480   bool Sat = CntName.endswith("_sat");
4481 
4482   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4483     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4484   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4485     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4486   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4487     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4488   } else {
4489     Error(CntLoc, "invalid counter name " + CntName);
4490     return false;
4491   }
4492 
4493   if (Failed) {
4494     Error(ValLoc, "too large value for " + CntName);
4495     return false;
4496   }
4497 
4498   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4499     return false;
4500 
4501   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4502     if (isToken(AsmToken::EndOfStatement)) {
4503       Error(getLoc(), "expected a counter name");
4504       return false;
4505     }
4506   }
4507 
4508   return true;
4509 }
4510 
4511 OperandMatchResultTy
4512 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4513   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4514   int64_t Waitcnt = getWaitcntBitMask(ISA);
4515   SMLoc S = getLoc();
4516 
4517   // If parse failed, do not return error code
4518   // to avoid excessive error messages.
4519   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4520     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4521   } else {
4522     parseExpr(Waitcnt);
4523   }
4524 
4525   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4526   return MatchOperand_Success;
4527 }
4528 
4529 bool
4530 AMDGPUOperand::isSWaitCnt() const {
4531   return isImm();
4532 }
4533 
4534 //===----------------------------------------------------------------------===//
4535 // hwreg
4536 //===----------------------------------------------------------------------===//
4537 
4538 bool
4539 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4540                                 int64_t &Offset,
4541                                 int64_t &Width) {
4542   using namespace llvm::AMDGPU::Hwreg;
4543 
4544   // The register may be specified by name or using a numeric code
4545   if (isToken(AsmToken::Identifier) &&
4546       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4547     HwReg.IsSymbolic = true;
4548     lex(); // skip message name
4549   } else if (!parseExpr(HwReg.Id)) {
4550     return false;
4551   }
4552 
4553   if (trySkipToken(AsmToken::RParen))
4554     return true;
4555 
4556   // parse optional params
4557   return
4558     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4559     parseExpr(Offset) &&
4560     skipToken(AsmToken::Comma, "expected a comma") &&
4561     parseExpr(Width) &&
4562     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4563 }
4564 
4565 void
4566 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4567                                const int64_t Offset,
4568                                const int64_t Width,
4569                                const SMLoc Loc) {
4570 
4571   using namespace llvm::AMDGPU::Hwreg;
4572 
4573   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4574     Error(Loc, "specified hardware register is not supported on this GPU");
4575   } else if (!isValidHwreg(HwReg.Id)) {
4576     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4577   } else if (!isValidHwregOffset(Offset)) {
4578     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4579   } else if (!isValidHwregWidth(Width)) {
4580     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4581   }
4582 }
4583 
4584 OperandMatchResultTy
4585 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4586   using namespace llvm::AMDGPU::Hwreg;
4587 
4588   int64_t ImmVal = 0;
4589   SMLoc Loc = getLoc();
4590 
4591   // If parse failed, do not return error code
4592   // to avoid excessive error messages.
4593   if (trySkipId("hwreg", AsmToken::LParen)) {
4594     OperandInfoTy HwReg(ID_UNKNOWN_);
4595     int64_t Offset = OFFSET_DEFAULT_;
4596     int64_t Width = WIDTH_DEFAULT_;
4597     if (parseHwregBody(HwReg, Offset, Width)) {
4598       validateHwreg(HwReg, Offset, Width, Loc);
4599       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4600     }
4601   } else if (parseExpr(ImmVal)) {
4602     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4603       Error(Loc, "invalid immediate: only 16-bit values are legal");
4604   }
4605 
4606   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4607   return MatchOperand_Success;
4608 }
4609 
4610 bool AMDGPUOperand::isHwreg() const {
4611   return isImmTy(ImmTyHwreg);
4612 }
4613 
4614 //===----------------------------------------------------------------------===//
4615 // sendmsg
4616 //===----------------------------------------------------------------------===//
4617 
4618 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4619   using namespace llvm::AMDGPU::SendMsg;
4620 
4621   if (Parser.getTok().getString() != "sendmsg")
4622     return true;
4623   Parser.Lex();
4624 
4625   if (getLexer().isNot(AsmToken::LParen))
4626     return true;
4627   Parser.Lex();
4628 
4629   if (getLexer().is(AsmToken::Identifier)) {
4630     Msg.IsSymbolic = true;
4631     Msg.Id = ID_UNKNOWN_;
4632     const std::string tok = Parser.getTok().getString();
4633     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4634       switch(i) {
4635         default: continue; // Omit gaps.
4636         case ID_GS_ALLOC_REQ:
4637           if (isSI() || isCI() || isVI())
4638             continue;
4639           break;
4640         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4641         case ID_SYSMSG: break;
4642       }
4643       if (tok == IdSymbolic[i]) {
4644         Msg.Id = i;
4645         break;
4646       }
4647     }
4648     Parser.Lex();
4649   } else {
4650     Msg.IsSymbolic = false;
4651     if (getLexer().isNot(AsmToken::Integer))
4652       return true;
4653     if (getParser().parseAbsoluteExpression(Msg.Id))
4654       return true;
4655     if (getLexer().is(AsmToken::Integer))
4656       if (getParser().parseAbsoluteExpression(Msg.Id))
4657         Msg.Id = ID_UNKNOWN_;
4658   }
4659   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4660     return false;
4661 
4662   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4663     if (getLexer().isNot(AsmToken::RParen))
4664       return true;
4665     Parser.Lex();
4666     return false;
4667   }
4668 
4669   if (getLexer().isNot(AsmToken::Comma))
4670     return true;
4671   Parser.Lex();
4672 
4673   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4674   Operation.Id = ID_UNKNOWN_;
4675   if (getLexer().is(AsmToken::Identifier)) {
4676     Operation.IsSymbolic = true;
4677     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4678     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4679     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4680     const StringRef Tok = Parser.getTok().getString();
4681     for (int i = F; i < L; ++i) {
4682       if (Tok == S[i]) {
4683         Operation.Id = i;
4684         break;
4685       }
4686     }
4687     Parser.Lex();
4688   } else {
4689     Operation.IsSymbolic = false;
4690     if (getLexer().isNot(AsmToken::Integer))
4691       return true;
4692     if (getParser().parseAbsoluteExpression(Operation.Id))
4693       return true;
4694   }
4695 
4696   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4697     // Stream id is optional.
4698     if (getLexer().is(AsmToken::RParen)) {
4699       Parser.Lex();
4700       return false;
4701     }
4702 
4703     if (getLexer().isNot(AsmToken::Comma))
4704       return true;
4705     Parser.Lex();
4706 
4707     if (getLexer().isNot(AsmToken::Integer))
4708       return true;
4709     if (getParser().parseAbsoluteExpression(StreamId))
4710       return true;
4711   }
4712 
4713   if (getLexer().isNot(AsmToken::RParen))
4714     return true;
4715   Parser.Lex();
4716   return false;
4717 }
4718 
4719 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4720   if (getLexer().getKind() != AsmToken::Identifier)
4721     return MatchOperand_NoMatch;
4722 
4723   StringRef Str = Parser.getTok().getString();
4724   int Slot = StringSwitch<int>(Str)
4725     .Case("p10", 0)
4726     .Case("p20", 1)
4727     .Case("p0", 2)
4728     .Default(-1);
4729 
4730   SMLoc S = Parser.getTok().getLoc();
4731   if (Slot == -1)
4732     return MatchOperand_ParseFail;
4733 
4734   Parser.Lex();
4735   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4736                                               AMDGPUOperand::ImmTyInterpSlot));
4737   return MatchOperand_Success;
4738 }
4739 
4740 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4741   if (getLexer().getKind() != AsmToken::Identifier)
4742     return MatchOperand_NoMatch;
4743 
4744   StringRef Str = Parser.getTok().getString();
4745   if (!Str.startswith("attr"))
4746     return MatchOperand_NoMatch;
4747 
4748   StringRef Chan = Str.take_back(2);
4749   int AttrChan = StringSwitch<int>(Chan)
4750     .Case(".x", 0)
4751     .Case(".y", 1)
4752     .Case(".z", 2)
4753     .Case(".w", 3)
4754     .Default(-1);
4755   if (AttrChan == -1)
4756     return MatchOperand_ParseFail;
4757 
4758   Str = Str.drop_back(2).drop_front(4);
4759 
4760   uint8_t Attr;
4761   if (Str.getAsInteger(10, Attr))
4762     return MatchOperand_ParseFail;
4763 
4764   SMLoc S = Parser.getTok().getLoc();
4765   Parser.Lex();
4766   if (Attr > 63) {
4767     Error(S, "out of bounds attr");
4768     return MatchOperand_Success;
4769   }
4770 
4771   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4772 
4773   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4774                                               AMDGPUOperand::ImmTyInterpAttr));
4775   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4776                                               AMDGPUOperand::ImmTyAttrChan));
4777   return MatchOperand_Success;
4778 }
4779 
4780 void AMDGPUAsmParser::errorExpTgt() {
4781   Error(Parser.getTok().getLoc(), "invalid exp target");
4782 }
4783 
4784 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4785                                                       uint8_t &Val) {
4786   if (Str == "null") {
4787     Val = 9;
4788     return MatchOperand_Success;
4789   }
4790 
4791   if (Str.startswith("mrt")) {
4792     Str = Str.drop_front(3);
4793     if (Str == "z") { // == mrtz
4794       Val = 8;
4795       return MatchOperand_Success;
4796     }
4797 
4798     if (Str.getAsInteger(10, Val))
4799       return MatchOperand_ParseFail;
4800 
4801     if (Val > 7)
4802       errorExpTgt();
4803 
4804     return MatchOperand_Success;
4805   }
4806 
4807   if (Str.startswith("pos")) {
4808     Str = Str.drop_front(3);
4809     if (Str.getAsInteger(10, Val))
4810       return MatchOperand_ParseFail;
4811 
4812     if (Val > 4 || (Val == 4 && !isGFX10()))
4813       errorExpTgt();
4814 
4815     Val += 12;
4816     return MatchOperand_Success;
4817   }
4818 
4819   if (isGFX10() && Str == "prim") {
4820     Val = 20;
4821     return MatchOperand_Success;
4822   }
4823 
4824   if (Str.startswith("param")) {
4825     Str = Str.drop_front(5);
4826     if (Str.getAsInteger(10, Val))
4827       return MatchOperand_ParseFail;
4828 
4829     if (Val >= 32)
4830       errorExpTgt();
4831 
4832     Val += 32;
4833     return MatchOperand_Success;
4834   }
4835 
4836   if (Str.startswith("invalid_target_")) {
4837     Str = Str.drop_front(15);
4838     if (Str.getAsInteger(10, Val))
4839       return MatchOperand_ParseFail;
4840 
4841     errorExpTgt();
4842     return MatchOperand_Success;
4843   }
4844 
4845   return MatchOperand_NoMatch;
4846 }
4847 
4848 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4849   uint8_t Val;
4850   StringRef Str = Parser.getTok().getString();
4851 
4852   auto Res = parseExpTgtImpl(Str, Val);
4853   if (Res != MatchOperand_Success)
4854     return Res;
4855 
4856   SMLoc S = Parser.getTok().getLoc();
4857   Parser.Lex();
4858 
4859   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4860                                               AMDGPUOperand::ImmTyExpTgt));
4861   return MatchOperand_Success;
4862 }
4863 
4864 OperandMatchResultTy
4865 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4866   using namespace llvm::AMDGPU::SendMsg;
4867 
4868   int64_t Imm16Val = 0;
4869   SMLoc S = Parser.getTok().getLoc();
4870 
4871   switch(getLexer().getKind()) {
4872   default:
4873     return MatchOperand_NoMatch;
4874   case AsmToken::Integer:
4875     // The operand can be an integer value.
4876     if (getParser().parseAbsoluteExpression(Imm16Val))
4877       return MatchOperand_NoMatch;
4878     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4879       Error(S, "invalid immediate: only 16-bit values are legal");
4880       // Do not return error code, but create an imm operand anyway and proceed
4881       // to the next operand, if any. That avoids unneccessary error messages.
4882     }
4883     break;
4884   case AsmToken::Identifier: {
4885       OperandInfoTy Msg(ID_UNKNOWN_);
4886       OperandInfoTy Operation(OP_UNKNOWN_);
4887       int64_t StreamId = STREAM_ID_DEFAULT_;
4888       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4889         return MatchOperand_ParseFail;
4890       do {
4891         // Validate and encode message ID.
4892         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4893                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4894                 || Msg.Id == ID_SYSMSG)) {
4895           if (Msg.IsSymbolic)
4896             Error(S, "invalid/unsupported symbolic name of message");
4897           else
4898             Error(S, "invalid/unsupported code of message");
4899           break;
4900         }
4901         Imm16Val = (Msg.Id << ID_SHIFT_);
4902         // Validate and encode operation ID.
4903         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4904           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4905             if (Operation.IsSymbolic)
4906               Error(S, "invalid symbolic name of GS_OP");
4907             else
4908               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4909             break;
4910           }
4911           if (Operation.Id == OP_GS_NOP
4912               && Msg.Id != ID_GS_DONE) {
4913             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4914             break;
4915           }
4916           Imm16Val |= (Operation.Id << OP_SHIFT_);
4917         }
4918         if (Msg.Id == ID_SYSMSG) {
4919           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4920             if (Operation.IsSymbolic)
4921               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4922             else
4923               Error(S, "invalid/unsupported code of SYSMSG_OP");
4924             break;
4925           }
4926           Imm16Val |= (Operation.Id << OP_SHIFT_);
4927         }
4928         // Validate and encode stream ID.
4929         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4930           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4931             Error(S, "invalid stream id: only 2-bit values are legal");
4932             break;
4933           }
4934           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4935         }
4936       } while (false);
4937     }
4938     break;
4939   }
4940   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4941   return MatchOperand_Success;
4942 }
4943 
4944 bool AMDGPUOperand::isSendMsg() const {
4945   return isImmTy(ImmTySendMsg);
4946 }
4947 
4948 //===----------------------------------------------------------------------===//
4949 // parser helpers
4950 //===----------------------------------------------------------------------===//
4951 
4952 bool
4953 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4954   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4955 }
4956 
4957 bool
4958 AMDGPUAsmParser::isId(const StringRef Id) const {
4959   return isId(getToken(), Id);
4960 }
4961 
4962 bool
4963 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4964   return getTokenKind() == Kind;
4965 }
4966 
4967 bool
4968 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4969   if (isId(Id)) {
4970     lex();
4971     return true;
4972   }
4973   return false;
4974 }
4975 
4976 bool
4977 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
4978   if (isId(Id) && peekToken().is(Kind)) {
4979     lex();
4980     lex();
4981     return true;
4982   }
4983   return false;
4984 }
4985 
4986 bool
4987 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4988   if (isToken(Kind)) {
4989     lex();
4990     return true;
4991   }
4992   return false;
4993 }
4994 
4995 bool
4996 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4997                            const StringRef ErrMsg) {
4998   if (!trySkipToken(Kind)) {
4999     Error(getLoc(), ErrMsg);
5000     return false;
5001   }
5002   return true;
5003 }
5004 
5005 bool
5006 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5007   return !getParser().parseAbsoluteExpression(Imm);
5008 }
5009 
5010 bool
5011 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5012   if (isToken(AsmToken::String)) {
5013     Val = getToken().getStringContents();
5014     lex();
5015     return true;
5016   } else {
5017     Error(getLoc(), ErrMsg);
5018     return false;
5019   }
5020 }
5021 
5022 AsmToken
5023 AMDGPUAsmParser::getToken() const {
5024   return Parser.getTok();
5025 }
5026 
5027 AsmToken
5028 AMDGPUAsmParser::peekToken() {
5029   return getLexer().peekTok();
5030 }
5031 
5032 void
5033 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5034   auto TokCount = getLexer().peekTokens(Tokens);
5035 
5036   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5037     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5038 }
5039 
5040 AsmToken::TokenKind
5041 AMDGPUAsmParser::getTokenKind() const {
5042   return getLexer().getKind();
5043 }
5044 
5045 SMLoc
5046 AMDGPUAsmParser::getLoc() const {
5047   return getToken().getLoc();
5048 }
5049 
5050 StringRef
5051 AMDGPUAsmParser::getTokenStr() const {
5052   return getToken().getString();
5053 }
5054 
5055 void
5056 AMDGPUAsmParser::lex() {
5057   Parser.Lex();
5058 }
5059 
5060 //===----------------------------------------------------------------------===//
5061 // swizzle
5062 //===----------------------------------------------------------------------===//
5063 
5064 LLVM_READNONE
5065 static unsigned
5066 encodeBitmaskPerm(const unsigned AndMask,
5067                   const unsigned OrMask,
5068                   const unsigned XorMask) {
5069   using namespace llvm::AMDGPU::Swizzle;
5070 
5071   return BITMASK_PERM_ENC |
5072          (AndMask << BITMASK_AND_SHIFT) |
5073          (OrMask  << BITMASK_OR_SHIFT)  |
5074          (XorMask << BITMASK_XOR_SHIFT);
5075 }
5076 
5077 bool
5078 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5079                                       const unsigned MinVal,
5080                                       const unsigned MaxVal,
5081                                       const StringRef ErrMsg) {
5082   for (unsigned i = 0; i < OpNum; ++i) {
5083     if (!skipToken(AsmToken::Comma, "expected a comma")){
5084       return false;
5085     }
5086     SMLoc ExprLoc = Parser.getTok().getLoc();
5087     if (!parseExpr(Op[i])) {
5088       return false;
5089     }
5090     if (Op[i] < MinVal || Op[i] > MaxVal) {
5091       Error(ExprLoc, ErrMsg);
5092       return false;
5093     }
5094   }
5095 
5096   return true;
5097 }
5098 
5099 bool
5100 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5101   using namespace llvm::AMDGPU::Swizzle;
5102 
5103   int64_t Lane[LANE_NUM];
5104   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5105                            "expected a 2-bit lane id")) {
5106     Imm = QUAD_PERM_ENC;
5107     for (unsigned I = 0; I < LANE_NUM; ++I) {
5108       Imm |= Lane[I] << (LANE_SHIFT * I);
5109     }
5110     return true;
5111   }
5112   return false;
5113 }
5114 
5115 bool
5116 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5117   using namespace llvm::AMDGPU::Swizzle;
5118 
5119   SMLoc S = Parser.getTok().getLoc();
5120   int64_t GroupSize;
5121   int64_t LaneIdx;
5122 
5123   if (!parseSwizzleOperands(1, &GroupSize,
5124                             2, 32,
5125                             "group size must be in the interval [2,32]")) {
5126     return false;
5127   }
5128   if (!isPowerOf2_64(GroupSize)) {
5129     Error(S, "group size must be a power of two");
5130     return false;
5131   }
5132   if (parseSwizzleOperands(1, &LaneIdx,
5133                            0, GroupSize - 1,
5134                            "lane id must be in the interval [0,group size - 1]")) {
5135     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5136     return true;
5137   }
5138   return false;
5139 }
5140 
5141 bool
5142 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5143   using namespace llvm::AMDGPU::Swizzle;
5144 
5145   SMLoc S = Parser.getTok().getLoc();
5146   int64_t GroupSize;
5147 
5148   if (!parseSwizzleOperands(1, &GroupSize,
5149       2, 32, "group size must be in the interval [2,32]")) {
5150     return false;
5151   }
5152   if (!isPowerOf2_64(GroupSize)) {
5153     Error(S, "group size must be a power of two");
5154     return false;
5155   }
5156 
5157   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5158   return true;
5159 }
5160 
5161 bool
5162 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5163   using namespace llvm::AMDGPU::Swizzle;
5164 
5165   SMLoc S = Parser.getTok().getLoc();
5166   int64_t GroupSize;
5167 
5168   if (!parseSwizzleOperands(1, &GroupSize,
5169       1, 16, "group size must be in the interval [1,16]")) {
5170     return false;
5171   }
5172   if (!isPowerOf2_64(GroupSize)) {
5173     Error(S, "group size must be a power of two");
5174     return false;
5175   }
5176 
5177   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5178   return true;
5179 }
5180 
5181 bool
5182 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5183   using namespace llvm::AMDGPU::Swizzle;
5184 
5185   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5186     return false;
5187   }
5188 
5189   StringRef Ctl;
5190   SMLoc StrLoc = Parser.getTok().getLoc();
5191   if (!parseString(Ctl)) {
5192     return false;
5193   }
5194   if (Ctl.size() != BITMASK_WIDTH) {
5195     Error(StrLoc, "expected a 5-character mask");
5196     return false;
5197   }
5198 
5199   unsigned AndMask = 0;
5200   unsigned OrMask = 0;
5201   unsigned XorMask = 0;
5202 
5203   for (size_t i = 0; i < Ctl.size(); ++i) {
5204     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5205     switch(Ctl[i]) {
5206     default:
5207       Error(StrLoc, "invalid mask");
5208       return false;
5209     case '0':
5210       break;
5211     case '1':
5212       OrMask |= Mask;
5213       break;
5214     case 'p':
5215       AndMask |= Mask;
5216       break;
5217     case 'i':
5218       AndMask |= Mask;
5219       XorMask |= Mask;
5220       break;
5221     }
5222   }
5223 
5224   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5225   return true;
5226 }
5227 
5228 bool
5229 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5230 
5231   SMLoc OffsetLoc = Parser.getTok().getLoc();
5232 
5233   if (!parseExpr(Imm)) {
5234     return false;
5235   }
5236   if (!isUInt<16>(Imm)) {
5237     Error(OffsetLoc, "expected a 16-bit offset");
5238     return false;
5239   }
5240   return true;
5241 }
5242 
5243 bool
5244 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5245   using namespace llvm::AMDGPU::Swizzle;
5246 
5247   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5248 
5249     SMLoc ModeLoc = Parser.getTok().getLoc();
5250     bool Ok = false;
5251 
5252     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5253       Ok = parseSwizzleQuadPerm(Imm);
5254     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5255       Ok = parseSwizzleBitmaskPerm(Imm);
5256     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5257       Ok = parseSwizzleBroadcast(Imm);
5258     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5259       Ok = parseSwizzleSwap(Imm);
5260     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5261       Ok = parseSwizzleReverse(Imm);
5262     } else {
5263       Error(ModeLoc, "expected a swizzle mode");
5264     }
5265 
5266     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5267   }
5268 
5269   return false;
5270 }
5271 
5272 OperandMatchResultTy
5273 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5274   SMLoc S = Parser.getTok().getLoc();
5275   int64_t Imm = 0;
5276 
5277   if (trySkipId("offset")) {
5278 
5279     bool Ok = false;
5280     if (skipToken(AsmToken::Colon, "expected a colon")) {
5281       if (trySkipId("swizzle")) {
5282         Ok = parseSwizzleMacro(Imm);
5283       } else {
5284         Ok = parseSwizzleOffset(Imm);
5285       }
5286     }
5287 
5288     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5289 
5290     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5291   } else {
5292     // Swizzle "offset" operand is optional.
5293     // If it is omitted, try parsing other optional operands.
5294     return parseOptionalOpr(Operands);
5295   }
5296 }
5297 
5298 bool
5299 AMDGPUOperand::isSwizzle() const {
5300   return isImmTy(ImmTySwizzle);
5301 }
5302 
5303 //===----------------------------------------------------------------------===//
5304 // VGPR Index Mode
5305 //===----------------------------------------------------------------------===//
5306 
5307 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5308 
5309   using namespace llvm::AMDGPU::VGPRIndexMode;
5310 
5311   if (trySkipToken(AsmToken::RParen)) {
5312     return OFF;
5313   }
5314 
5315   int64_t Imm = 0;
5316 
5317   while (true) {
5318     unsigned Mode = 0;
5319     SMLoc S = Parser.getTok().getLoc();
5320 
5321     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5322       if (trySkipId(IdSymbolic[ModeId])) {
5323         Mode = 1 << ModeId;
5324         break;
5325       }
5326     }
5327 
5328     if (Mode == 0) {
5329       Error(S, (Imm == 0)?
5330                "expected a VGPR index mode or a closing parenthesis" :
5331                "expected a VGPR index mode");
5332       break;
5333     }
5334 
5335     if (Imm & Mode) {
5336       Error(S, "duplicate VGPR index mode");
5337       break;
5338     }
5339     Imm |= Mode;
5340 
5341     if (trySkipToken(AsmToken::RParen))
5342       break;
5343     if (!skipToken(AsmToken::Comma,
5344                    "expected a comma or a closing parenthesis"))
5345       break;
5346   }
5347 
5348   return Imm;
5349 }
5350 
5351 OperandMatchResultTy
5352 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5353 
5354   int64_t Imm = 0;
5355   SMLoc S = Parser.getTok().getLoc();
5356 
5357   if (getLexer().getKind() == AsmToken::Identifier &&
5358       Parser.getTok().getString() == "gpr_idx" &&
5359       getLexer().peekTok().is(AsmToken::LParen)) {
5360 
5361     Parser.Lex();
5362     Parser.Lex();
5363 
5364     // If parse failed, trigger an error but do not return error code
5365     // to avoid excessive error messages.
5366     Imm = parseGPRIdxMacro();
5367 
5368   } else {
5369     if (getParser().parseAbsoluteExpression(Imm))
5370       return MatchOperand_NoMatch;
5371     if (Imm < 0 || !isUInt<4>(Imm)) {
5372       Error(S, "invalid immediate: only 4-bit values are legal");
5373     }
5374   }
5375 
5376   Operands.push_back(
5377       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5378   return MatchOperand_Success;
5379 }
5380 
5381 bool AMDGPUOperand::isGPRIdxMode() const {
5382   return isImmTy(ImmTyGprIdxMode);
5383 }
5384 
5385 //===----------------------------------------------------------------------===//
5386 // sopp branch targets
5387 //===----------------------------------------------------------------------===//
5388 
5389 OperandMatchResultTy
5390 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5391   SMLoc S = Parser.getTok().getLoc();
5392 
5393   switch (getLexer().getKind()) {
5394     default: return MatchOperand_ParseFail;
5395     case AsmToken::Integer: {
5396       int64_t Imm;
5397       if (getParser().parseAbsoluteExpression(Imm))
5398         return MatchOperand_ParseFail;
5399       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5400       return MatchOperand_Success;
5401     }
5402 
5403     case AsmToken::Identifier:
5404       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5405           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5406                                   Parser.getTok().getString()), getContext()), S));
5407       Parser.Lex();
5408       return MatchOperand_Success;
5409   }
5410 }
5411 
5412 //===----------------------------------------------------------------------===//
5413 // Boolean holding registers
5414 //===----------------------------------------------------------------------===//
5415 
5416 OperandMatchResultTy
5417 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5418   return parseReg(Operands);
5419 }
5420 
5421 //===----------------------------------------------------------------------===//
5422 // mubuf
5423 //===----------------------------------------------------------------------===//
5424 
5425 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5426   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5427 }
5428 
5429 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5430   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5431 }
5432 
5433 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5434   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5435 }
5436 
5437 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5438                                const OperandVector &Operands,
5439                                bool IsAtomic,
5440                                bool IsAtomicReturn,
5441                                bool IsLds) {
5442   bool IsLdsOpcode = IsLds;
5443   bool HasLdsModifier = false;
5444   OptionalImmIndexMap OptionalIdx;
5445   assert(IsAtomicReturn ? IsAtomic : true);
5446   unsigned FirstOperandIdx = 1;
5447 
5448   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5449     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5450 
5451     // Add the register arguments
5452     if (Op.isReg()) {
5453       Op.addRegOperands(Inst, 1);
5454       // Insert a tied src for atomic return dst.
5455       // This cannot be postponed as subsequent calls to
5456       // addImmOperands rely on correct number of MC operands.
5457       if (IsAtomicReturn && i == FirstOperandIdx)
5458         Op.addRegOperands(Inst, 1);
5459       continue;
5460     }
5461 
5462     // Handle the case where soffset is an immediate
5463     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5464       Op.addImmOperands(Inst, 1);
5465       continue;
5466     }
5467 
5468     HasLdsModifier |= Op.isLDS();
5469 
5470     // Handle tokens like 'offen' which are sometimes hard-coded into the
5471     // asm string.  There are no MCInst operands for these.
5472     if (Op.isToken()) {
5473       continue;
5474     }
5475     assert(Op.isImm());
5476 
5477     // Handle optional arguments
5478     OptionalIdx[Op.getImmTy()] = i;
5479   }
5480 
5481   // This is a workaround for an llvm quirk which may result in an
5482   // incorrect instruction selection. Lds and non-lds versions of
5483   // MUBUF instructions are identical except that lds versions
5484   // have mandatory 'lds' modifier. However this modifier follows
5485   // optional modifiers and llvm asm matcher regards this 'lds'
5486   // modifier as an optional one. As a result, an lds version
5487   // of opcode may be selected even if it has no 'lds' modifier.
5488   if (IsLdsOpcode && !HasLdsModifier) {
5489     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5490     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5491       Inst.setOpcode(NoLdsOpcode);
5492       IsLdsOpcode = false;
5493     }
5494   }
5495 
5496   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5497   if (!IsAtomic) { // glc is hard-coded.
5498     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5499   }
5500   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5501 
5502   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5503     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5504   }
5505 
5506   if (isGFX10())
5507     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5508 }
5509 
5510 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5511   OptionalImmIndexMap OptionalIdx;
5512 
5513   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5514     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5515 
5516     // Add the register arguments
5517     if (Op.isReg()) {
5518       Op.addRegOperands(Inst, 1);
5519       continue;
5520     }
5521 
5522     // Handle the case where soffset is an immediate
5523     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5524       Op.addImmOperands(Inst, 1);
5525       continue;
5526     }
5527 
5528     // Handle tokens like 'offen' which are sometimes hard-coded into the
5529     // asm string.  There are no MCInst operands for these.
5530     if (Op.isToken()) {
5531       continue;
5532     }
5533     assert(Op.isImm());
5534 
5535     // Handle optional arguments
5536     OptionalIdx[Op.getImmTy()] = i;
5537   }
5538 
5539   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5540                         AMDGPUOperand::ImmTyOffset);
5541   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5542   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5543   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5544   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5545 
5546   if (isGFX10())
5547     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5548 }
5549 
5550 //===----------------------------------------------------------------------===//
5551 // mimg
5552 //===----------------------------------------------------------------------===//
5553 
5554 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5555                               bool IsAtomic) {
5556   unsigned I = 1;
5557   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5558   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5559     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5560   }
5561 
5562   if (IsAtomic) {
5563     // Add src, same as dst
5564     assert(Desc.getNumDefs() == 1);
5565     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5566   }
5567 
5568   OptionalImmIndexMap OptionalIdx;
5569 
5570   for (unsigned E = Operands.size(); I != E; ++I) {
5571     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5572 
5573     // Add the register arguments
5574     if (Op.isReg()) {
5575       Op.addRegOperands(Inst, 1);
5576     } else if (Op.isImmModifier()) {
5577       OptionalIdx[Op.getImmTy()] = I;
5578     } else if (!Op.isToken()) {
5579       llvm_unreachable("unexpected operand type");
5580     }
5581   }
5582 
5583   bool IsGFX10 = isGFX10();
5584 
5585   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5586   if (IsGFX10)
5587     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5588   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5589   if (IsGFX10)
5590     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5591   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5592   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5593   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5594   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5595   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5596   if (!IsGFX10)
5597     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5598   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5599 }
5600 
5601 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5602   cvtMIMG(Inst, Operands, true);
5603 }
5604 
5605 //===----------------------------------------------------------------------===//
5606 // smrd
5607 //===----------------------------------------------------------------------===//
5608 
5609 bool AMDGPUOperand::isSMRDOffset8() const {
5610   return isImm() && isUInt<8>(getImm());
5611 }
5612 
5613 bool AMDGPUOperand::isSMRDOffset20() const {
5614   return isImm() && isUInt<20>(getImm());
5615 }
5616 
5617 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5618   // 32-bit literals are only supported on CI and we only want to use them
5619   // when the offset is > 8-bits.
5620   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5621 }
5622 
5623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5624   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5625 }
5626 
5627 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5628   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5629 }
5630 
5631 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5632   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5633 }
5634 
5635 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5636   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5637 }
5638 
5639 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5640   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5641 }
5642 
5643 //===----------------------------------------------------------------------===//
5644 // vop3
5645 //===----------------------------------------------------------------------===//
5646 
5647 static bool ConvertOmodMul(int64_t &Mul) {
5648   if (Mul != 1 && Mul != 2 && Mul != 4)
5649     return false;
5650 
5651   Mul >>= 1;
5652   return true;
5653 }
5654 
5655 static bool ConvertOmodDiv(int64_t &Div) {
5656   if (Div == 1) {
5657     Div = 0;
5658     return true;
5659   }
5660 
5661   if (Div == 2) {
5662     Div = 3;
5663     return true;
5664   }
5665 
5666   return false;
5667 }
5668 
5669 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5670   if (BoundCtrl == 0) {
5671     BoundCtrl = 1;
5672     return true;
5673   }
5674 
5675   if (BoundCtrl == -1) {
5676     BoundCtrl = 0;
5677     return true;
5678   }
5679 
5680   return false;
5681 }
5682 
5683 // Note: the order in this table matches the order of operands in AsmString.
5684 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5685   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5686   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5687   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5688   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5689   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5690   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5691   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5692   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5693   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5694   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5695   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5696   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5697   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5698   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5699   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5700   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5701   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5702   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5703   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5704   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5705   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5706   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5707   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5708   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5709   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5710   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5711   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5712   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5713   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5714   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5715   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5716   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5717   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5718   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5719   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5720   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5721   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5722   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5723   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5724   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5725 };
5726 
5727 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5728   unsigned size = Operands.size();
5729   assert(size > 0);
5730 
5731   OperandMatchResultTy res = parseOptionalOpr(Operands);
5732 
5733   // This is a hack to enable hardcoded mandatory operands which follow
5734   // optional operands.
5735   //
5736   // Current design assumes that all operands after the first optional operand
5737   // are also optional. However implementation of some instructions violates
5738   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5739   //
5740   // To alleviate this problem, we have to (implicitly) parse extra operands
5741   // to make sure autogenerated parser of custom operands never hit hardcoded
5742   // mandatory operands.
5743 
5744   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5745 
5746     // We have parsed the first optional operand.
5747     // Parse as many operands as necessary to skip all mandatory operands.
5748 
5749     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5750       if (res != MatchOperand_Success ||
5751           getLexer().is(AsmToken::EndOfStatement)) break;
5752       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5753       res = parseOptionalOpr(Operands);
5754     }
5755   }
5756 
5757   return res;
5758 }
5759 
5760 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5761   OperandMatchResultTy res;
5762   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5763     // try to parse any optional operand here
5764     if (Op.IsBit) {
5765       res = parseNamedBit(Op.Name, Operands, Op.Type);
5766     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5767       res = parseOModOperand(Operands);
5768     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5769                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5770                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5771       res = parseSDWASel(Operands, Op.Name, Op.Type);
5772     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5773       res = parseSDWADstUnused(Operands);
5774     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5775                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5776                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5777                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5778       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5779                                         Op.ConvertResult);
5780     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5781       res = parseDim(Operands);
5782     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5783       res = parseDfmtNfmt(Operands);
5784     } else {
5785       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5786     }
5787     if (res != MatchOperand_NoMatch) {
5788       return res;
5789     }
5790   }
5791   return MatchOperand_NoMatch;
5792 }
5793 
5794 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5795   StringRef Name = Parser.getTok().getString();
5796   if (Name == "mul") {
5797     return parseIntWithPrefix("mul", Operands,
5798                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5799   }
5800 
5801   if (Name == "div") {
5802     return parseIntWithPrefix("div", Operands,
5803                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5804   }
5805 
5806   return MatchOperand_NoMatch;
5807 }
5808 
5809 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5810   cvtVOP3P(Inst, Operands);
5811 
5812   int Opc = Inst.getOpcode();
5813 
5814   int SrcNum;
5815   const int Ops[] = { AMDGPU::OpName::src0,
5816                       AMDGPU::OpName::src1,
5817                       AMDGPU::OpName::src2 };
5818   for (SrcNum = 0;
5819        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5820        ++SrcNum);
5821   assert(SrcNum > 0);
5822 
5823   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5824   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5825 
5826   if ((OpSel & (1 << SrcNum)) != 0) {
5827     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5828     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5829     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5830   }
5831 }
5832 
5833 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5834       // 1. This operand is input modifiers
5835   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5836       // 2. This is not last operand
5837       && Desc.NumOperands > (OpNum + 1)
5838       // 3. Next operand is register class
5839       && Desc.OpInfo[OpNum + 1].RegClass != -1
5840       // 4. Next register is not tied to any other operand
5841       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5842 }
5843 
5844 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5845 {
5846   OptionalImmIndexMap OptionalIdx;
5847   unsigned Opc = Inst.getOpcode();
5848 
5849   unsigned I = 1;
5850   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5851   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5852     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5853   }
5854 
5855   for (unsigned E = Operands.size(); I != E; ++I) {
5856     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5857     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5858       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5859     } else if (Op.isInterpSlot() ||
5860                Op.isInterpAttr() ||
5861                Op.isAttrChan()) {
5862       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5863     } else if (Op.isImmModifier()) {
5864       OptionalIdx[Op.getImmTy()] = I;
5865     } else {
5866       llvm_unreachable("unhandled operand type");
5867     }
5868   }
5869 
5870   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5871     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5872   }
5873 
5874   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5875     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5876   }
5877 
5878   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5879     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5880   }
5881 }
5882 
5883 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5884                               OptionalImmIndexMap &OptionalIdx) {
5885   unsigned Opc = Inst.getOpcode();
5886 
5887   unsigned I = 1;
5888   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5889   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5890     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5891   }
5892 
5893   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5894     // This instruction has src modifiers
5895     for (unsigned E = Operands.size(); I != E; ++I) {
5896       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5897       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5898         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5899       } else if (Op.isImmModifier()) {
5900         OptionalIdx[Op.getImmTy()] = I;
5901       } else if (Op.isRegOrImm()) {
5902         Op.addRegOrImmOperands(Inst, 1);
5903       } else {
5904         llvm_unreachable("unhandled operand type");
5905       }
5906     }
5907   } else {
5908     // No src modifiers
5909     for (unsigned E = Operands.size(); I != E; ++I) {
5910       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5911       if (Op.isMod()) {
5912         OptionalIdx[Op.getImmTy()] = I;
5913       } else {
5914         Op.addRegOrImmOperands(Inst, 1);
5915       }
5916     }
5917   }
5918 
5919   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5920     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5921   }
5922 
5923   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5924     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5925   }
5926 
5927   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5928   // it has src2 register operand that is tied to dst operand
5929   // we don't allow modifiers for this operand in assembler so src2_modifiers
5930   // should be 0.
5931   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5932       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5933       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5934       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5935       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5936       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5937       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5938     auto it = Inst.begin();
5939     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5940     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5941     ++it;
5942     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5943   }
5944 }
5945 
5946 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5947   OptionalImmIndexMap OptionalIdx;
5948   cvtVOP3(Inst, Operands, OptionalIdx);
5949 }
5950 
5951 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5952                                const OperandVector &Operands) {
5953   OptionalImmIndexMap OptIdx;
5954   const int Opc = Inst.getOpcode();
5955   const MCInstrDesc &Desc = MII.get(Opc);
5956 
5957   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5958 
5959   cvtVOP3(Inst, Operands, OptIdx);
5960 
5961   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5962     assert(!IsPacked);
5963     Inst.addOperand(Inst.getOperand(0));
5964   }
5965 
5966   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5967   // instruction, and then figure out where to actually put the modifiers
5968 
5969   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5970 
5971   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5972   if (OpSelHiIdx != -1) {
5973     int DefaultVal = IsPacked ? -1 : 0;
5974     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5975                           DefaultVal);
5976   }
5977 
5978   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5979   if (NegLoIdx != -1) {
5980     assert(IsPacked);
5981     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5982     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5983   }
5984 
5985   const int Ops[] = { AMDGPU::OpName::src0,
5986                       AMDGPU::OpName::src1,
5987                       AMDGPU::OpName::src2 };
5988   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5989                          AMDGPU::OpName::src1_modifiers,
5990                          AMDGPU::OpName::src2_modifiers };
5991 
5992   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5993 
5994   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5995   unsigned OpSelHi = 0;
5996   unsigned NegLo = 0;
5997   unsigned NegHi = 0;
5998 
5999   if (OpSelHiIdx != -1) {
6000     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6001   }
6002 
6003   if (NegLoIdx != -1) {
6004     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6005     NegLo = Inst.getOperand(NegLoIdx).getImm();
6006     NegHi = Inst.getOperand(NegHiIdx).getImm();
6007   }
6008 
6009   for (int J = 0; J < 3; ++J) {
6010     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6011     if (OpIdx == -1)
6012       break;
6013 
6014     uint32_t ModVal = 0;
6015 
6016     if ((OpSel & (1 << J)) != 0)
6017       ModVal |= SISrcMods::OP_SEL_0;
6018 
6019     if ((OpSelHi & (1 << J)) != 0)
6020       ModVal |= SISrcMods::OP_SEL_1;
6021 
6022     if ((NegLo & (1 << J)) != 0)
6023       ModVal |= SISrcMods::NEG;
6024 
6025     if ((NegHi & (1 << J)) != 0)
6026       ModVal |= SISrcMods::NEG_HI;
6027 
6028     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6029 
6030     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6031   }
6032 }
6033 
6034 //===----------------------------------------------------------------------===//
6035 // dpp
6036 //===----------------------------------------------------------------------===//
6037 
6038 bool AMDGPUOperand::isDPP8() const {
6039   return isImmTy(ImmTyDPP8);
6040 }
6041 
6042 bool AMDGPUOperand::isDPPCtrl() const {
6043   using namespace AMDGPU::DPP;
6044 
6045   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6046   if (result) {
6047     int64_t Imm = getImm();
6048     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6049            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6050            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6051            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6052            (Imm == DppCtrl::WAVE_SHL1) ||
6053            (Imm == DppCtrl::WAVE_ROL1) ||
6054            (Imm == DppCtrl::WAVE_SHR1) ||
6055            (Imm == DppCtrl::WAVE_ROR1) ||
6056            (Imm == DppCtrl::ROW_MIRROR) ||
6057            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6058            (Imm == DppCtrl::BCAST15) ||
6059            (Imm == DppCtrl::BCAST31) ||
6060            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6061            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6062   }
6063   return false;
6064 }
6065 
6066 bool AMDGPUOperand::isS16Imm() const {
6067   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6068 }
6069 
6070 bool AMDGPUOperand::isU16Imm() const {
6071   return isImm() && isUInt<16>(getImm());
6072 }
6073 
6074 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6075   if (!isGFX10())
6076     return MatchOperand_NoMatch;
6077 
6078   SMLoc S = Parser.getTok().getLoc();
6079 
6080   if (getLexer().isNot(AsmToken::Identifier))
6081     return MatchOperand_NoMatch;
6082   if (getLexer().getTok().getString() != "dim")
6083     return MatchOperand_NoMatch;
6084 
6085   Parser.Lex();
6086   if (getLexer().isNot(AsmToken::Colon))
6087     return MatchOperand_ParseFail;
6088 
6089   Parser.Lex();
6090 
6091   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6092   // integer.
6093   std::string Token;
6094   if (getLexer().is(AsmToken::Integer)) {
6095     SMLoc Loc = getLexer().getTok().getEndLoc();
6096     Token = getLexer().getTok().getString();
6097     Parser.Lex();
6098     if (getLexer().getTok().getLoc() != Loc)
6099       return MatchOperand_ParseFail;
6100   }
6101   if (getLexer().isNot(AsmToken::Identifier))
6102     return MatchOperand_ParseFail;
6103   Token += getLexer().getTok().getString();
6104 
6105   StringRef DimId = Token;
6106   if (DimId.startswith("SQ_RSRC_IMG_"))
6107     DimId = DimId.substr(12);
6108 
6109   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6110   if (!DimInfo)
6111     return MatchOperand_ParseFail;
6112 
6113   Parser.Lex();
6114 
6115   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6116                                               AMDGPUOperand::ImmTyDim));
6117   return MatchOperand_Success;
6118 }
6119 
6120 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6121   SMLoc S = Parser.getTok().getLoc();
6122   StringRef Prefix;
6123 
6124   if (getLexer().getKind() == AsmToken::Identifier) {
6125     Prefix = Parser.getTok().getString();
6126   } else {
6127     return MatchOperand_NoMatch;
6128   }
6129 
6130   if (Prefix != "dpp8")
6131     return parseDPPCtrl(Operands);
6132   if (!isGFX10())
6133     return MatchOperand_NoMatch;
6134 
6135   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6136 
6137   int64_t Sels[8];
6138 
6139   Parser.Lex();
6140   if (getLexer().isNot(AsmToken::Colon))
6141     return MatchOperand_ParseFail;
6142 
6143   Parser.Lex();
6144   if (getLexer().isNot(AsmToken::LBrac))
6145     return MatchOperand_ParseFail;
6146 
6147   Parser.Lex();
6148   if (getParser().parseAbsoluteExpression(Sels[0]))
6149     return MatchOperand_ParseFail;
6150   if (0 > Sels[0] || 7 < Sels[0])
6151     return MatchOperand_ParseFail;
6152 
6153   for (size_t i = 1; i < 8; ++i) {
6154     if (getLexer().isNot(AsmToken::Comma))
6155       return MatchOperand_ParseFail;
6156 
6157     Parser.Lex();
6158     if (getParser().parseAbsoluteExpression(Sels[i]))
6159       return MatchOperand_ParseFail;
6160     if (0 > Sels[i] || 7 < Sels[i])
6161       return MatchOperand_ParseFail;
6162   }
6163 
6164   if (getLexer().isNot(AsmToken::RBrac))
6165     return MatchOperand_ParseFail;
6166   Parser.Lex();
6167 
6168   unsigned DPP8 = 0;
6169   for (size_t i = 0; i < 8; ++i)
6170     DPP8 |= (Sels[i] << (i * 3));
6171 
6172   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6173   return MatchOperand_Success;
6174 }
6175 
6176 OperandMatchResultTy
6177 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6178   using namespace AMDGPU::DPP;
6179 
6180   SMLoc S = Parser.getTok().getLoc();
6181   StringRef Prefix;
6182   int64_t Int;
6183 
6184   if (getLexer().getKind() == AsmToken::Identifier) {
6185     Prefix = Parser.getTok().getString();
6186   } else {
6187     return MatchOperand_NoMatch;
6188   }
6189 
6190   if (Prefix == "row_mirror") {
6191     Int = DppCtrl::ROW_MIRROR;
6192     Parser.Lex();
6193   } else if (Prefix == "row_half_mirror") {
6194     Int = DppCtrl::ROW_HALF_MIRROR;
6195     Parser.Lex();
6196   } else {
6197     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6198     if (Prefix != "quad_perm"
6199         && Prefix != "row_shl"
6200         && Prefix != "row_shr"
6201         && Prefix != "row_ror"
6202         && Prefix != "wave_shl"
6203         && Prefix != "wave_rol"
6204         && Prefix != "wave_shr"
6205         && Prefix != "wave_ror"
6206         && Prefix != "row_bcast"
6207         && Prefix != "row_share"
6208         && Prefix != "row_xmask") {
6209       return MatchOperand_NoMatch;
6210     }
6211 
6212     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6213       return MatchOperand_NoMatch;
6214 
6215     if (!isVI() && !isGFX9() &&
6216         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6217          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6218          Prefix == "row_bcast"))
6219       return MatchOperand_NoMatch;
6220 
6221     Parser.Lex();
6222     if (getLexer().isNot(AsmToken::Colon))
6223       return MatchOperand_ParseFail;
6224 
6225     if (Prefix == "quad_perm") {
6226       // quad_perm:[%d,%d,%d,%d]
6227       Parser.Lex();
6228       if (getLexer().isNot(AsmToken::LBrac))
6229         return MatchOperand_ParseFail;
6230       Parser.Lex();
6231 
6232       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6233         return MatchOperand_ParseFail;
6234 
6235       for (int i = 0; i < 3; ++i) {
6236         if (getLexer().isNot(AsmToken::Comma))
6237           return MatchOperand_ParseFail;
6238         Parser.Lex();
6239 
6240         int64_t Temp;
6241         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6242           return MatchOperand_ParseFail;
6243         const int shift = i*2 + 2;
6244         Int += (Temp << shift);
6245       }
6246 
6247       if (getLexer().isNot(AsmToken::RBrac))
6248         return MatchOperand_ParseFail;
6249       Parser.Lex();
6250     } else {
6251       // sel:%d
6252       Parser.Lex();
6253       if (getParser().parseAbsoluteExpression(Int))
6254         return MatchOperand_ParseFail;
6255 
6256       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6257         Int |= DppCtrl::ROW_SHL0;
6258       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6259         Int |= DppCtrl::ROW_SHR0;
6260       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6261         Int |= DppCtrl::ROW_ROR0;
6262       } else if (Prefix == "wave_shl" && 1 == Int) {
6263         Int = DppCtrl::WAVE_SHL1;
6264       } else if (Prefix == "wave_rol" && 1 == Int) {
6265         Int = DppCtrl::WAVE_ROL1;
6266       } else if (Prefix == "wave_shr" && 1 == Int) {
6267         Int = DppCtrl::WAVE_SHR1;
6268       } else if (Prefix == "wave_ror" && 1 == Int) {
6269         Int = DppCtrl::WAVE_ROR1;
6270       } else if (Prefix == "row_bcast") {
6271         if (Int == 15) {
6272           Int = DppCtrl::BCAST15;
6273         } else if (Int == 31) {
6274           Int = DppCtrl::BCAST31;
6275         } else {
6276           return MatchOperand_ParseFail;
6277         }
6278       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6279         Int |= DppCtrl::ROW_SHARE_FIRST;
6280       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6281         Int |= DppCtrl::ROW_XMASK_FIRST;
6282       } else {
6283         return MatchOperand_ParseFail;
6284       }
6285     }
6286   }
6287 
6288   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6289   return MatchOperand_Success;
6290 }
6291 
6292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6293   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6294 }
6295 
6296 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6297   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6298 }
6299 
6300 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6301   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6302 }
6303 
6304 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6305   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6306 }
6307 
6308 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6309   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6310 }
6311 
6312 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6313   OptionalImmIndexMap OptionalIdx;
6314 
6315   unsigned I = 1;
6316   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6317   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6318     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6319   }
6320 
6321   int Fi = 0;
6322   for (unsigned E = Operands.size(); I != E; ++I) {
6323     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6324                                             MCOI::TIED_TO);
6325     if (TiedTo != -1) {
6326       assert((unsigned)TiedTo < Inst.getNumOperands());
6327       // handle tied old or src2 for MAC instructions
6328       Inst.addOperand(Inst.getOperand(TiedTo));
6329     }
6330     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6331     // Add the register arguments
6332     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6333       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6334       // Skip it.
6335       continue;
6336     }
6337 
6338     if (IsDPP8) {
6339       if (Op.isDPP8()) {
6340         Op.addImmOperands(Inst, 1);
6341       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6342         Op.addRegWithFPInputModsOperands(Inst, 2);
6343       } else if (Op.isFI()) {
6344         Fi = Op.getImm();
6345       } else if (Op.isReg()) {
6346         Op.addRegOperands(Inst, 1);
6347       } else {
6348         llvm_unreachable("Invalid operand type");
6349       }
6350     } else {
6351       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6352         Op.addRegWithFPInputModsOperands(Inst, 2);
6353       } else if (Op.isDPPCtrl()) {
6354         Op.addImmOperands(Inst, 1);
6355       } else if (Op.isImm()) {
6356         // Handle optional arguments
6357         OptionalIdx[Op.getImmTy()] = I;
6358       } else {
6359         llvm_unreachable("Invalid operand type");
6360       }
6361     }
6362   }
6363 
6364   if (IsDPP8) {
6365     using namespace llvm::AMDGPU::DPP;
6366     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6367   } else {
6368     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6369     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6370     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6371     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6372       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6373     }
6374   }
6375 }
6376 
6377 //===----------------------------------------------------------------------===//
6378 // sdwa
6379 //===----------------------------------------------------------------------===//
6380 
6381 OperandMatchResultTy
6382 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6383                               AMDGPUOperand::ImmTy Type) {
6384   using namespace llvm::AMDGPU::SDWA;
6385 
6386   SMLoc S = Parser.getTok().getLoc();
6387   StringRef Value;
6388   OperandMatchResultTy res;
6389 
6390   res = parseStringWithPrefix(Prefix, Value);
6391   if (res != MatchOperand_Success) {
6392     return res;
6393   }
6394 
6395   int64_t Int;
6396   Int = StringSwitch<int64_t>(Value)
6397         .Case("BYTE_0", SdwaSel::BYTE_0)
6398         .Case("BYTE_1", SdwaSel::BYTE_1)
6399         .Case("BYTE_2", SdwaSel::BYTE_2)
6400         .Case("BYTE_3", SdwaSel::BYTE_3)
6401         .Case("WORD_0", SdwaSel::WORD_0)
6402         .Case("WORD_1", SdwaSel::WORD_1)
6403         .Case("DWORD", SdwaSel::DWORD)
6404         .Default(0xffffffff);
6405   Parser.Lex(); // eat last token
6406 
6407   if (Int == 0xffffffff) {
6408     return MatchOperand_ParseFail;
6409   }
6410 
6411   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6412   return MatchOperand_Success;
6413 }
6414 
6415 OperandMatchResultTy
6416 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6417   using namespace llvm::AMDGPU::SDWA;
6418 
6419   SMLoc S = Parser.getTok().getLoc();
6420   StringRef Value;
6421   OperandMatchResultTy res;
6422 
6423   res = parseStringWithPrefix("dst_unused", Value);
6424   if (res != MatchOperand_Success) {
6425     return res;
6426   }
6427 
6428   int64_t Int;
6429   Int = StringSwitch<int64_t>(Value)
6430         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6431         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6432         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6433         .Default(0xffffffff);
6434   Parser.Lex(); // eat last token
6435 
6436   if (Int == 0xffffffff) {
6437     return MatchOperand_ParseFail;
6438   }
6439 
6440   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6441   return MatchOperand_Success;
6442 }
6443 
6444 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6445   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6446 }
6447 
6448 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6449   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6450 }
6451 
6452 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6453   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6454 }
6455 
6456 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6457   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6458 }
6459 
6460 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6461                               uint64_t BasicInstType, bool skipVcc) {
6462   using namespace llvm::AMDGPU::SDWA;
6463 
6464   OptionalImmIndexMap OptionalIdx;
6465   bool skippedVcc = false;
6466 
6467   unsigned I = 1;
6468   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6469   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6470     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6471   }
6472 
6473   for (unsigned E = Operands.size(); I != E; ++I) {
6474     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6475     if (skipVcc && !skippedVcc && Op.isReg() &&
6476         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6477       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6478       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6479       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6480       // Skip VCC only if we didn't skip it on previous iteration.
6481       if (BasicInstType == SIInstrFlags::VOP2 &&
6482           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6483         skippedVcc = true;
6484         continue;
6485       } else if (BasicInstType == SIInstrFlags::VOPC &&
6486                  Inst.getNumOperands() == 0) {
6487         skippedVcc = true;
6488         continue;
6489       }
6490     }
6491     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6492       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6493     } else if (Op.isImm()) {
6494       // Handle optional arguments
6495       OptionalIdx[Op.getImmTy()] = I;
6496     } else {
6497       llvm_unreachable("Invalid operand type");
6498     }
6499     skippedVcc = false;
6500   }
6501 
6502   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6503       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6504       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6505     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6506     switch (BasicInstType) {
6507     case SIInstrFlags::VOP1:
6508       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6509       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6510         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6511       }
6512       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6513       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6514       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6515       break;
6516 
6517     case SIInstrFlags::VOP2:
6518       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6519       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6520         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6521       }
6522       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6523       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6524       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6525       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6526       break;
6527 
6528     case SIInstrFlags::VOPC:
6529       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6530         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6531       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6532       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6533       break;
6534 
6535     default:
6536       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6537     }
6538   }
6539 
6540   // special case v_mac_{f16, f32}:
6541   // it has src2 register operand that is tied to dst operand
6542   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6543       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6544     auto it = Inst.begin();
6545     std::advance(
6546       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6547     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6548   }
6549 }
6550 
6551 /// Force static initialization.
6552 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6553   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6554   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6555 }
6556 
6557 #define GET_REGISTER_MATCHER
6558 #define GET_MATCHER_IMPLEMENTATION
6559 #define GET_MNEMONIC_SPELL_CHECKER
6560 #include "AMDGPUGenAsmMatcher.inc"
6561 
6562 // This fuction should be defined after auto-generated include so that we have
6563 // MatchClassKind enum defined
6564 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6565                                                      unsigned Kind) {
6566   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6567   // But MatchInstructionImpl() expects to meet token and fails to validate
6568   // operand. This method checks if we are given immediate operand but expect to
6569   // get corresponding token.
6570   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6571   switch (Kind) {
6572   case MCK_addr64:
6573     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6574   case MCK_gds:
6575     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6576   case MCK_lds:
6577     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6578   case MCK_glc:
6579     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6580   case MCK_idxen:
6581     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6582   case MCK_offen:
6583     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6584   case MCK_SSrcB32:
6585     // When operands have expression values, they will return true for isToken,
6586     // because it is not possible to distinguish between a token and an
6587     // expression at parse time. MatchInstructionImpl() will always try to
6588     // match an operand as a token, when isToken returns true, and when the
6589     // name of the expression is not a valid token, the match will fail,
6590     // so we need to handle it here.
6591     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6592   case MCK_SSrcF32:
6593     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6594   case MCK_SoppBrTarget:
6595     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6596   case MCK_VReg32OrOff:
6597     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6598   case MCK_InterpSlot:
6599     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6600   case MCK_Attr:
6601     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6602   case MCK_AttrChan:
6603     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6604   default:
6605     return Match_InvalidOperand;
6606   }
6607 }
6608 
6609 //===----------------------------------------------------------------------===//
6610 // endpgm
6611 //===----------------------------------------------------------------------===//
6612 
6613 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6614   SMLoc S = Parser.getTok().getLoc();
6615   int64_t Imm = 0;
6616 
6617   if (!parseExpr(Imm)) {
6618     // The operand is optional, if not present default to 0
6619     Imm = 0;
6620   }
6621 
6622   if (!isUInt<16>(Imm)) {
6623     Error(S, "expected a 16-bit value");
6624     return MatchOperand_ParseFail;
6625   }
6626 
6627   Operands.push_back(
6628       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6629   return MatchOperand_Success;
6630 }
6631 
6632 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6633