1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyEndpgm,
183     ImmTyHigh
184   };
185 
186 private:
187   struct TokOp {
188     const char *Data;
189     unsigned Length;
190   };
191 
192   struct ImmOp {
193     int64_t Val;
194     ImmTy Type;
195     bool IsFPImm;
196     Modifiers Mods;
197   };
198 
199   struct RegOp {
200     unsigned RegNo;
201     Modifiers Mods;
202   };
203 
204   union {
205     TokOp Tok;
206     ImmOp Imm;
207     RegOp Reg;
208     const MCExpr *Expr;
209   };
210 
211 public:
212   bool isToken() const override {
213     if (Kind == Token)
214       return true;
215 
216     if (Kind != Expression || !Expr)
217       return false;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isa<MCSymbolRefExpr>(Expr);
224   }
225 
226   bool isImm() const override {
227     return Kind == Immediate;
228   }
229 
230   bool isInlinableImm(MVT type) const;
231   bool isLiteralImm(MVT type) const;
232 
233   bool isRegKind() const {
234     return Kind == Register;
235   }
236 
237   bool isReg() const override {
238     return isRegKind() && !hasModifiers();
239   }
240 
241   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
242     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
243   }
244 
245   bool isRegOrImmWithInt16InputMods() const {
246     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
247   }
248 
249   bool isRegOrImmWithInt32InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
251   }
252 
253   bool isRegOrImmWithInt64InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
255   }
256 
257   bool isRegOrImmWithFP16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
259   }
260 
261   bool isRegOrImmWithFP32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
263   }
264 
265   bool isRegOrImmWithFP64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
267   }
268 
269   bool isVReg() const {
270     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
271            isRegClass(AMDGPU::VReg_64RegClassID) ||
272            isRegClass(AMDGPU::VReg_96RegClassID) ||
273            isRegClass(AMDGPU::VReg_128RegClassID) ||
274            isRegClass(AMDGPU::VReg_256RegClassID) ||
275            isRegClass(AMDGPU::VReg_512RegClassID);
276   }
277 
278   bool isVReg32() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID);
280   }
281 
282   bool isVReg32OrOff() const {
283     return isOff() || isVReg32();
284   }
285 
286   bool isSDWAOperand(MVT type) const;
287   bool isSDWAFP16Operand() const;
288   bool isSDWAFP32Operand() const;
289   bool isSDWAInt16Operand() const;
290   bool isSDWAInt32Operand() const;
291 
292   bool isImmTy(ImmTy ImmT) const {
293     return isImm() && Imm.Type == ImmT;
294   }
295 
296   bool isImmModifier() const {
297     return isImm() && Imm.Type != ImmTyNone;
298   }
299 
300   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
301   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
302   bool isDMask() const { return isImmTy(ImmTyDMask); }
303   bool isDim() const { return isImmTy(ImmTyDim); }
304   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
305   bool isDA() const { return isImmTy(ImmTyDA); }
306   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
307   bool isLWE() const { return isImmTy(ImmTyLWE); }
308   bool isOff() const { return isImmTy(ImmTyOff); }
309   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
310   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
311   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
312   bool isOffen() const { return isImmTy(ImmTyOffen); }
313   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
314   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
315   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
316   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
317   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
318 
319   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
320   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
321   bool isGDS() const { return isImmTy(ImmTyGDS); }
322   bool isLDS() const { return isImmTy(ImmTyLDS); }
323   bool isDLC() const { return isImmTy(ImmTyDLC); }
324   bool isGLC() const { return isImmTy(ImmTyGLC); }
325   bool isSLC() const { return isImmTy(ImmTySLC); }
326   bool isTFE() const { return isImmTy(ImmTyTFE); }
327   bool isD16() const { return isImmTy(ImmTyD16); }
328   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
329   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
330   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
331   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
332   bool isFI() const { return isImmTy(ImmTyDppFi); }
333   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
334   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
335   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
336   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
337   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
338   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
339   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
340   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
341   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
342   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
343   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
344   bool isHigh() const { return isImmTy(ImmTyHigh); }
345 
346   bool isMod() const {
347     return isClampSI() || isOModSI();
348   }
349 
350   bool isRegOrImm() const {
351     return isReg() || isImm();
352   }
353 
354   bool isRegClass(unsigned RCID) const;
355 
356   bool isInlineValue() const;
357 
358   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
359     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
360   }
361 
362   bool isSCSrcB16() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
364   }
365 
366   bool isSCSrcV2B16() const {
367     return isSCSrcB16();
368   }
369 
370   bool isSCSrcB32() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
372   }
373 
374   bool isSCSrcB64() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
376   }
377 
378   bool isBoolReg() const;
379 
380   bool isSCSrcF16() const {
381     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
382   }
383 
384   bool isSCSrcV2F16() const {
385     return isSCSrcF16();
386   }
387 
388   bool isSCSrcF32() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
390   }
391 
392   bool isSCSrcF64() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
394   }
395 
396   bool isSSrcB32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
398   }
399 
400   bool isSSrcB16() const {
401     return isSCSrcB16() || isLiteralImm(MVT::i16);
402   }
403 
404   bool isSSrcV2B16() const {
405     llvm_unreachable("cannot happen");
406     return isSSrcB16();
407   }
408 
409   bool isSSrcB64() const {
410     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
411     // See isVSrc64().
412     return isSCSrcB64() || isLiteralImm(MVT::i64);
413   }
414 
415   bool isSSrcF32() const {
416     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
417   }
418 
419   bool isSSrcF64() const {
420     return isSCSrcB64() || isLiteralImm(MVT::f64);
421   }
422 
423   bool isSSrcF16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::f16);
425   }
426 
427   bool isSSrcV2F16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcF16();
430   }
431 
432   bool isSSrcOrLdsB32() const {
433     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
434            isLiteralImm(MVT::i32) || isExpr();
435   }
436 
437   bool isVCSrcB32() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
439   }
440 
441   bool isVCSrcB64() const {
442     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
443   }
444 
445   bool isVCSrcB16() const {
446     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
447   }
448 
449   bool isVCSrcV2B16() const {
450     return isVCSrcB16();
451   }
452 
453   bool isVCSrcF32() const {
454     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
455   }
456 
457   bool isVCSrcF64() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
459   }
460 
461   bool isVCSrcF16() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
463   }
464 
465   bool isVCSrcV2F16() const {
466     return isVCSrcF16();
467   }
468 
469   bool isVSrcB32() const {
470     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVSrcB64() const {
474     return isVCSrcF64() || isLiteralImm(MVT::i64);
475   }
476 
477   bool isVSrcB16() const {
478     return isVCSrcF16() || isLiteralImm(MVT::i16);
479   }
480 
481   bool isVSrcV2B16() const {
482     return isVSrcB16() || isLiteralImm(MVT::v2i16);
483   }
484 
485   bool isVSrcF32() const {
486     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
487   }
488 
489   bool isVSrcF64() const {
490     return isVCSrcF64() || isLiteralImm(MVT::f64);
491   }
492 
493   bool isVSrcF16() const {
494     return isVCSrcF16() || isLiteralImm(MVT::f16);
495   }
496 
497   bool isVSrcV2F16() const {
498     return isVSrcF16() || isLiteralImm(MVT::v2f16);
499   }
500 
501   bool isKImmFP32() const {
502     return isLiteralImm(MVT::f32);
503   }
504 
505   bool isKImmFP16() const {
506     return isLiteralImm(MVT::f16);
507   }
508 
509   bool isMem() const override {
510     return false;
511   }
512 
513   bool isExpr() const {
514     return Kind == Expression;
515   }
516 
517   bool isSoppBrTarget() const {
518     return isExpr() || isImm();
519   }
520 
521   bool isSWaitCnt() const;
522   bool isHwreg() const;
523   bool isSendMsg() const;
524   bool isSwizzle() const;
525   bool isSMRDOffset8() const;
526   bool isSMRDOffset20() const;
527   bool isSMRDLiteralOffset() const;
528   bool isDPP8() const;
529   bool isDPPCtrl() const;
530   bool isGPRIdxMode() const;
531   bool isS16Imm() const;
532   bool isU16Imm() const;
533   bool isEndpgm() const;
534 
535   StringRef getExpressionAsToken() const {
536     assert(isExpr());
537     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
538     return S->getSymbol().getName();
539   }
540 
541   StringRef getToken() const {
542     assert(isToken());
543 
544     if (Kind == Expression)
545       return getExpressionAsToken();
546 
547     return StringRef(Tok.Data, Tok.Length);
548   }
549 
550   int64_t getImm() const {
551     assert(isImm());
552     return Imm.Val;
553   }
554 
555   ImmTy getImmTy() const {
556     assert(isImm());
557     return Imm.Type;
558   }
559 
560   unsigned getReg() const override {
561     assert(isRegKind());
562     return Reg.RegNo;
563   }
564 
565   SMLoc getStartLoc() const override {
566     return StartLoc;
567   }
568 
569   SMLoc getEndLoc() const override {
570     return EndLoc;
571   }
572 
573   SMRange getLocRange() const {
574     return SMRange(StartLoc, EndLoc);
575   }
576 
577   Modifiers getModifiers() const {
578     assert(isRegKind() || isImmTy(ImmTyNone));
579     return isRegKind() ? Reg.Mods : Imm.Mods;
580   }
581 
582   void setModifiers(Modifiers Mods) {
583     assert(isRegKind() || isImmTy(ImmTyNone));
584     if (isRegKind())
585       Reg.Mods = Mods;
586     else
587       Imm.Mods = Mods;
588   }
589 
590   bool hasModifiers() const {
591     return getModifiers().hasModifiers();
592   }
593 
594   bool hasFPModifiers() const {
595     return getModifiers().hasFPModifiers();
596   }
597 
598   bool hasIntModifiers() const {
599     return getModifiers().hasIntModifiers();
600   }
601 
602   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
603 
604   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
605 
606   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
607 
608   template <unsigned Bitwidth>
609   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
610 
611   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
612     addKImmFPOperands<16>(Inst, N);
613   }
614 
615   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
616     addKImmFPOperands<32>(Inst, N);
617   }
618 
619   void addRegOperands(MCInst &Inst, unsigned N) const;
620 
621   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
622     addRegOperands(Inst, N);
623   }
624 
625   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
626     if (isRegKind())
627       addRegOperands(Inst, N);
628     else if (isExpr())
629       Inst.addOperand(MCOperand::createExpr(Expr));
630     else
631       addImmOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
635     Modifiers Mods = getModifiers();
636     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
637     if (isRegKind()) {
638       addRegOperands(Inst, N);
639     } else {
640       addImmOperands(Inst, N, false);
641     }
642   }
643 
644   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
645     assert(!hasIntModifiers());
646     addRegOrImmWithInputModsOperands(Inst, N);
647   }
648 
649   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
650     assert(!hasFPModifiers());
651     addRegOrImmWithInputModsOperands(Inst, N);
652   }
653 
654   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
655     Modifiers Mods = getModifiers();
656     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
657     assert(isRegKind());
658     addRegOperands(Inst, N);
659   }
660 
661   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
662     assert(!hasIntModifiers());
663     addRegWithInputModsOperands(Inst, N);
664   }
665 
666   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
667     assert(!hasFPModifiers());
668     addRegWithInputModsOperands(Inst, N);
669   }
670 
671   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
672     if (isImm())
673       addImmOperands(Inst, N);
674     else {
675       assert(isExpr());
676       Inst.addOperand(MCOperand::createExpr(Expr));
677     }
678   }
679 
680   static void printImmTy(raw_ostream& OS, ImmTy Type) {
681     switch (Type) {
682     case ImmTyNone: OS << "None"; break;
683     case ImmTyGDS: OS << "GDS"; break;
684     case ImmTyLDS: OS << "LDS"; break;
685     case ImmTyOffen: OS << "Offen"; break;
686     case ImmTyIdxen: OS << "Idxen"; break;
687     case ImmTyAddr64: OS << "Addr64"; break;
688     case ImmTyOffset: OS << "Offset"; break;
689     case ImmTyInstOffset: OS << "InstOffset"; break;
690     case ImmTyOffset0: OS << "Offset0"; break;
691     case ImmTyOffset1: OS << "Offset1"; break;
692     case ImmTyDLC: OS << "DLC"; break;
693     case ImmTyGLC: OS << "GLC"; break;
694     case ImmTySLC: OS << "SLC"; break;
695     case ImmTyTFE: OS << "TFE"; break;
696     case ImmTyD16: OS << "D16"; break;
697     case ImmTyFORMAT: OS << "FORMAT"; break;
698     case ImmTyClampSI: OS << "ClampSI"; break;
699     case ImmTyOModSI: OS << "OModSI"; break;
700     case ImmTyDPP8: OS << "DPP8"; break;
701     case ImmTyDppCtrl: OS << "DppCtrl"; break;
702     case ImmTyDppRowMask: OS << "DppRowMask"; break;
703     case ImmTyDppBankMask: OS << "DppBankMask"; break;
704     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
705     case ImmTyDppFi: OS << "FI"; break;
706     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
707     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
708     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
709     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
710     case ImmTyDMask: OS << "DMask"; break;
711     case ImmTyDim: OS << "Dim"; break;
712     case ImmTyUNorm: OS << "UNorm"; break;
713     case ImmTyDA: OS << "DA"; break;
714     case ImmTyR128A16: OS << "R128A16"; break;
715     case ImmTyLWE: OS << "LWE"; break;
716     case ImmTyOff: OS << "Off"; break;
717     case ImmTyExpTgt: OS << "ExpTgt"; break;
718     case ImmTyExpCompr: OS << "ExpCompr"; break;
719     case ImmTyExpVM: OS << "ExpVM"; break;
720     case ImmTyHwreg: OS << "Hwreg"; break;
721     case ImmTySendMsg: OS << "SendMsg"; break;
722     case ImmTyInterpSlot: OS << "InterpSlot"; break;
723     case ImmTyInterpAttr: OS << "InterpAttr"; break;
724     case ImmTyAttrChan: OS << "AttrChan"; break;
725     case ImmTyOpSel: OS << "OpSel"; break;
726     case ImmTyOpSelHi: OS << "OpSelHi"; break;
727     case ImmTyNegLo: OS << "NegLo"; break;
728     case ImmTyNegHi: OS << "NegHi"; break;
729     case ImmTySwizzle: OS << "Swizzle"; break;
730     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
731     case ImmTyHigh: OS << "High"; break;
732     case ImmTyEndpgm:
733       OS << "Endpgm";
734       break;
735     }
736   }
737 
738   void print(raw_ostream &OS) const override {
739     switch (Kind) {
740     case Register:
741       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
742       break;
743     case Immediate:
744       OS << '<' << getImm();
745       if (getImmTy() != ImmTyNone) {
746         OS << " type: "; printImmTy(OS, getImmTy());
747       }
748       OS << " mods: " << Imm.Mods << '>';
749       break;
750     case Token:
751       OS << '\'' << getToken() << '\'';
752       break;
753     case Expression:
754       OS << "<expr " << *Expr << '>';
755       break;
756     }
757   }
758 
759   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
760                                       int64_t Val, SMLoc Loc,
761                                       ImmTy Type = ImmTyNone,
762                                       bool IsFPImm = false) {
763     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
764     Op->Imm.Val = Val;
765     Op->Imm.IsFPImm = IsFPImm;
766     Op->Imm.Type = Type;
767     Op->Imm.Mods = Modifiers();
768     Op->StartLoc = Loc;
769     Op->EndLoc = Loc;
770     return Op;
771   }
772 
773   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
774                                         StringRef Str, SMLoc Loc,
775                                         bool HasExplicitEncodingSize = true) {
776     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
777     Res->Tok.Data = Str.data();
778     Res->Tok.Length = Str.size();
779     Res->StartLoc = Loc;
780     Res->EndLoc = Loc;
781     return Res;
782   }
783 
784   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
785                                       unsigned RegNo, SMLoc S,
786                                       SMLoc E) {
787     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
788     Op->Reg.RegNo = RegNo;
789     Op->Reg.Mods = Modifiers();
790     Op->StartLoc = S;
791     Op->EndLoc = E;
792     return Op;
793   }
794 
795   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
796                                        const class MCExpr *Expr, SMLoc S) {
797     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
798     Op->Expr = Expr;
799     Op->StartLoc = S;
800     Op->EndLoc = S;
801     return Op;
802   }
803 };
804 
805 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
806   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
807   return OS;
808 }
809 
810 //===----------------------------------------------------------------------===//
811 // AsmParser
812 //===----------------------------------------------------------------------===//
813 
814 // Holds info related to the current kernel, e.g. count of SGPRs used.
815 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
816 // .amdgpu_hsa_kernel or at EOF.
817 class KernelScopeInfo {
818   int SgprIndexUnusedMin = -1;
819   int VgprIndexUnusedMin = -1;
820   MCContext *Ctx = nullptr;
821 
822   void usesSgprAt(int i) {
823     if (i >= SgprIndexUnusedMin) {
824       SgprIndexUnusedMin = ++i;
825       if (Ctx) {
826         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
827         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
828       }
829     }
830   }
831 
832   void usesVgprAt(int i) {
833     if (i >= VgprIndexUnusedMin) {
834       VgprIndexUnusedMin = ++i;
835       if (Ctx) {
836         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
837         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
838       }
839     }
840   }
841 
842 public:
843   KernelScopeInfo() = default;
844 
845   void initialize(MCContext &Context) {
846     Ctx = &Context;
847     usesSgprAt(SgprIndexUnusedMin = -1);
848     usesVgprAt(VgprIndexUnusedMin = -1);
849   }
850 
851   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
852     switch (RegKind) {
853       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
854       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
855       default: break;
856     }
857   }
858 };
859 
860 class AMDGPUAsmParser : public MCTargetAsmParser {
861   MCAsmParser &Parser;
862 
863   // Number of extra operands parsed after the first optional operand.
864   // This may be necessary to skip hardcoded mandatory operands.
865   static const unsigned MAX_OPR_LOOKAHEAD = 8;
866 
867   unsigned ForcedEncodingSize = 0;
868   bool ForcedDPP = false;
869   bool ForcedSDWA = false;
870   KernelScopeInfo KernelScope;
871 
872   /// @name Auto-generated Match Functions
873   /// {
874 
875 #define GET_ASSEMBLER_HEADER
876 #include "AMDGPUGenAsmMatcher.inc"
877 
878   /// }
879 
880 private:
881   bool ParseAsAbsoluteExpression(uint32_t &Ret);
882   bool OutOfRangeError(SMRange Range);
883   /// Calculate VGPR/SGPR blocks required for given target, reserved
884   /// registers, and user-specified NextFreeXGPR values.
885   ///
886   /// \param Features [in] Target features, used for bug corrections.
887   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
888   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
889   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
890   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
891   /// descriptor field, if valid.
892   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
893   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
894   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
895   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
896   /// \param VGPRBlocks [out] Result VGPR block count.
897   /// \param SGPRBlocks [out] Result SGPR block count.
898   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
899                           bool FlatScrUsed, bool XNACKUsed,
900                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
901                           SMRange VGPRRange, unsigned NextFreeSGPR,
902                           SMRange SGPRRange, unsigned &VGPRBlocks,
903                           unsigned &SGPRBlocks);
904   bool ParseDirectiveAMDGCNTarget();
905   bool ParseDirectiveAMDHSAKernel();
906   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
907   bool ParseDirectiveHSACodeObjectVersion();
908   bool ParseDirectiveHSACodeObjectISA();
909   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
910   bool ParseDirectiveAMDKernelCodeT();
911   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
912   bool ParseDirectiveAMDGPUHsaKernel();
913 
914   bool ParseDirectiveISAVersion();
915   bool ParseDirectiveHSAMetadata();
916   bool ParseDirectivePALMetadataBegin();
917   bool ParseDirectivePALMetadata();
918   bool ParseDirectiveAMDGPULDS();
919 
920   /// Common code to parse out a block of text (typically YAML) between start and
921   /// end directives.
922   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
923                            const char *AssemblerDirectiveEnd,
924                            std::string &CollectString);
925 
926   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
927                              RegisterKind RegKind, unsigned Reg1,
928                              unsigned RegNum);
929   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
930                            unsigned& RegNum, unsigned& RegWidth,
931                            unsigned *DwordRegIndex);
932   bool isRegister();
933   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
934   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
935   void initializeGprCountSymbol(RegisterKind RegKind);
936   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
937                              unsigned RegWidth);
938   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
939                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
940   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
941                  bool IsGdsHardcoded);
942 
943 public:
944   enum AMDGPUMatchResultTy {
945     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
946   };
947   enum OperandMode {
948     OperandMode_Default,
949     OperandMode_NSA,
950   };
951 
952   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
953 
954   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
955                const MCInstrInfo &MII,
956                const MCTargetOptions &Options)
957       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
958     MCAsmParserExtension::Initialize(Parser);
959 
960     if (getFeatureBits().none()) {
961       // Set default features.
962       copySTI().ToggleFeature("southern-islands");
963     }
964 
965     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
966 
967     {
968       // TODO: make those pre-defined variables read-only.
969       // Currently there is none suitable machinery in the core llvm-mc for this.
970       // MCSymbol::isRedefinable is intended for another purpose, and
971       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
972       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
973       MCContext &Ctx = getContext();
974       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
975         MCSymbol *Sym =
976             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
977         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
978         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
979         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
980         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
981         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
982       } else {
983         MCSymbol *Sym =
984             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
985         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
986         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
987         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
988         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
989         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
990       }
991       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
992         initializeGprCountSymbol(IS_VGPR);
993         initializeGprCountSymbol(IS_SGPR);
994       } else
995         KernelScope.initialize(getContext());
996     }
997   }
998 
999   bool hasXNACK() const {
1000     return AMDGPU::hasXNACK(getSTI());
1001   }
1002 
1003   bool hasMIMG_R128() const {
1004     return AMDGPU::hasMIMG_R128(getSTI());
1005   }
1006 
1007   bool hasPackedD16() const {
1008     return AMDGPU::hasPackedD16(getSTI());
1009   }
1010 
1011   bool isSI() const {
1012     return AMDGPU::isSI(getSTI());
1013   }
1014 
1015   bool isCI() const {
1016     return AMDGPU::isCI(getSTI());
1017   }
1018 
1019   bool isVI() const {
1020     return AMDGPU::isVI(getSTI());
1021   }
1022 
1023   bool isGFX9() const {
1024     return AMDGPU::isGFX9(getSTI());
1025   }
1026 
1027   bool isGFX10() const {
1028     return AMDGPU::isGFX10(getSTI());
1029   }
1030 
1031   bool hasInv2PiInlineImm() const {
1032     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1033   }
1034 
1035   bool hasFlatOffsets() const {
1036     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1037   }
1038 
1039   bool hasSGPR102_SGPR103() const {
1040     return !isVI() && !isGFX9();
1041   }
1042 
1043   bool hasSGPR104_SGPR105() const {
1044     return isGFX10();
1045   }
1046 
1047   bool hasIntClamp() const {
1048     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1049   }
1050 
1051   AMDGPUTargetStreamer &getTargetStreamer() {
1052     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1053     return static_cast<AMDGPUTargetStreamer &>(TS);
1054   }
1055 
1056   const MCRegisterInfo *getMRI() const {
1057     // We need this const_cast because for some reason getContext() is not const
1058     // in MCAsmParser.
1059     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1060   }
1061 
1062   const MCInstrInfo *getMII() const {
1063     return &MII;
1064   }
1065 
1066   const FeatureBitset &getFeatureBits() const {
1067     return getSTI().getFeatureBits();
1068   }
1069 
1070   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1071   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1072   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1073 
1074   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1075   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1076   bool isForcedDPP() const { return ForcedDPP; }
1077   bool isForcedSDWA() const { return ForcedSDWA; }
1078   ArrayRef<unsigned> getMatchedVariants() const;
1079 
1080   std::unique_ptr<AMDGPUOperand> parseRegister();
1081   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1082   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1083   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1084                                       unsigned Kind) override;
1085   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1086                                OperandVector &Operands, MCStreamer &Out,
1087                                uint64_t &ErrorInfo,
1088                                bool MatchingInlineAsm) override;
1089   bool ParseDirective(AsmToken DirectiveID) override;
1090   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1091                                     OperandMode Mode = OperandMode_Default);
1092   StringRef parseMnemonicSuffix(StringRef Name);
1093   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1094                         SMLoc NameLoc, OperandVector &Operands) override;
1095   //bool ProcessInstruction(MCInst &Inst);
1096 
1097   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1098 
1099   OperandMatchResultTy
1100   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1101                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1102                      bool (*ConvertResult)(int64_t &) = nullptr);
1103 
1104   OperandMatchResultTy
1105   parseOperandArrayWithPrefix(const char *Prefix,
1106                               OperandVector &Operands,
1107                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1108                               bool (*ConvertResult)(int64_t&) = nullptr);
1109 
1110   OperandMatchResultTy
1111   parseNamedBit(const char *Name, OperandVector &Operands,
1112                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1113   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1114                                              StringRef &Value);
1115 
1116   bool isModifier();
1117   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1118   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1119   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1120   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1121   bool parseSP3NegModifier();
1122   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1123   OperandMatchResultTy parseReg(OperandVector &Operands);
1124   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1125   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1126   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1127   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1128   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1129   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1130   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1131 
1132   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1133   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1134   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1135   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1136 
1137   bool parseCnt(int64_t &IntVal);
1138   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1139   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1140 
1141 private:
1142   struct OperandInfoTy {
1143     int64_t Id;
1144     bool IsSymbolic = false;
1145 
1146     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1147   };
1148 
1149   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1150   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1151   void validateHwreg(const OperandInfoTy &HwReg,
1152                      const int64_t Offset,
1153                      const int64_t Width,
1154                      const SMLoc Loc);
1155 
1156   void errorExpTgt();
1157   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1158 
1159   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1160   bool validateSOPLiteral(const MCInst &Inst) const;
1161   bool validateConstantBusLimitations(const MCInst &Inst);
1162   bool validateEarlyClobberLimitations(const MCInst &Inst);
1163   bool validateIntClampSupported(const MCInst &Inst);
1164   bool validateMIMGAtomicDMask(const MCInst &Inst);
1165   bool validateMIMGGatherDMask(const MCInst &Inst);
1166   bool validateMIMGDataSize(const MCInst &Inst);
1167   bool validateMIMGAddrSize(const MCInst &Inst);
1168   bool validateMIMGD16(const MCInst &Inst);
1169   bool validateMIMGDim(const MCInst &Inst);
1170   bool validateLdsDirect(const MCInst &Inst);
1171   bool validateOpSel(const MCInst &Inst);
1172   bool validateVccOperand(unsigned Reg) const;
1173   bool validateVOP3Literal(const MCInst &Inst) const;
1174   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1175   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1176   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1177 
1178   bool isId(const StringRef Id) const;
1179   bool isId(const AsmToken &Token, const StringRef Id) const;
1180   bool isToken(const AsmToken::TokenKind Kind) const;
1181   bool trySkipId(const StringRef Id);
1182   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1183   bool trySkipToken(const AsmToken::TokenKind Kind);
1184   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1185   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1186   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1187   AsmToken::TokenKind getTokenKind() const;
1188   bool parseExpr(int64_t &Imm);
1189   StringRef getTokenStr() const;
1190   AsmToken peekToken();
1191   AsmToken getToken() const;
1192   SMLoc getLoc() const;
1193   void lex();
1194 
1195 public:
1196   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1197   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1198 
1199   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1200   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1201   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1202   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1203   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1204   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1205 
1206   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1207                             const unsigned MinVal,
1208                             const unsigned MaxVal,
1209                             const StringRef ErrMsg);
1210   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1211   bool parseSwizzleOffset(int64_t &Imm);
1212   bool parseSwizzleMacro(int64_t &Imm);
1213   bool parseSwizzleQuadPerm(int64_t &Imm);
1214   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1215   bool parseSwizzleBroadcast(int64_t &Imm);
1216   bool parseSwizzleSwap(int64_t &Imm);
1217   bool parseSwizzleReverse(int64_t &Imm);
1218 
1219   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1220   int64_t parseGPRIdxMacro();
1221 
1222   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1223   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1224   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1225   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1226   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1227 
1228   AMDGPUOperand::Ptr defaultDLC() const;
1229   AMDGPUOperand::Ptr defaultGLC() const;
1230   AMDGPUOperand::Ptr defaultSLC() const;
1231 
1232   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1233   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1234   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1235   AMDGPUOperand::Ptr defaultOffsetU12() const;
1236   AMDGPUOperand::Ptr defaultOffsetS13() const;
1237 
1238   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1239 
1240   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1241                OptionalImmIndexMap &OptionalIdx);
1242   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1243   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1244   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1245 
1246   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1247 
1248   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1249                bool IsAtomic = false);
1250   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1251 
1252   OperandMatchResultTy parseDim(OperandVector &Operands);
1253   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1254   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1255   AMDGPUOperand::Ptr defaultRowMask() const;
1256   AMDGPUOperand::Ptr defaultBankMask() const;
1257   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1258   AMDGPUOperand::Ptr defaultFI() const;
1259   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1260   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1261 
1262   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1263                                     AMDGPUOperand::ImmTy Type);
1264   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1265   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1266   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1267   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1268   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1269   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1270                 uint64_t BasicInstType, bool skipVcc = false);
1271 
1272   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1273   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1274 };
1275 
1276 struct OptionalOperand {
1277   const char *Name;
1278   AMDGPUOperand::ImmTy Type;
1279   bool IsBit;
1280   bool (*ConvertResult)(int64_t&);
1281 };
1282 
1283 } // end anonymous namespace
1284 
1285 // May be called with integer type with equivalent bitwidth.
1286 static const fltSemantics *getFltSemantics(unsigned Size) {
1287   switch (Size) {
1288   case 4:
1289     return &APFloat::IEEEsingle();
1290   case 8:
1291     return &APFloat::IEEEdouble();
1292   case 2:
1293     return &APFloat::IEEEhalf();
1294   default:
1295     llvm_unreachable("unsupported fp type");
1296   }
1297 }
1298 
1299 static const fltSemantics *getFltSemantics(MVT VT) {
1300   return getFltSemantics(VT.getSizeInBits() / 8);
1301 }
1302 
1303 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1304   switch (OperandType) {
1305   case AMDGPU::OPERAND_REG_IMM_INT32:
1306   case AMDGPU::OPERAND_REG_IMM_FP32:
1307   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1308   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1309     return &APFloat::IEEEsingle();
1310   case AMDGPU::OPERAND_REG_IMM_INT64:
1311   case AMDGPU::OPERAND_REG_IMM_FP64:
1312   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1313   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1314     return &APFloat::IEEEdouble();
1315   case AMDGPU::OPERAND_REG_IMM_INT16:
1316   case AMDGPU::OPERAND_REG_IMM_FP16:
1317   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1318   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1319   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1320   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1321   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1322   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1323     return &APFloat::IEEEhalf();
1324   default:
1325     llvm_unreachable("unsupported fp type");
1326   }
1327 }
1328 
1329 //===----------------------------------------------------------------------===//
1330 // Operand
1331 //===----------------------------------------------------------------------===//
1332 
1333 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1334   bool Lost;
1335 
1336   // Convert literal to single precision
1337   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1338                                                APFloat::rmNearestTiesToEven,
1339                                                &Lost);
1340   // We allow precision lost but not overflow or underflow
1341   if (Status != APFloat::opOK &&
1342       Lost &&
1343       ((Status & APFloat::opOverflow)  != 0 ||
1344        (Status & APFloat::opUnderflow) != 0)) {
1345     return false;
1346   }
1347 
1348   return true;
1349 }
1350 
1351 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1352   return isUIntN(Size, Val) || isIntN(Size, Val);
1353 }
1354 
1355 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1356 
1357   // This is a hack to enable named inline values like
1358   // shared_base with both 32-bit and 64-bit operands.
1359   // Note that these values are defined as
1360   // 32-bit operands only.
1361   if (isInlineValue()) {
1362     return true;
1363   }
1364 
1365   if (!isImmTy(ImmTyNone)) {
1366     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1367     return false;
1368   }
1369   // TODO: We should avoid using host float here. It would be better to
1370   // check the float bit values which is what a few other places do.
1371   // We've had bot failures before due to weird NaN support on mips hosts.
1372 
1373   APInt Literal(64, Imm.Val);
1374 
1375   if (Imm.IsFPImm) { // We got fp literal token
1376     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1377       return AMDGPU::isInlinableLiteral64(Imm.Val,
1378                                           AsmParser->hasInv2PiInlineImm());
1379     }
1380 
1381     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1382     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1383       return false;
1384 
1385     if (type.getScalarSizeInBits() == 16) {
1386       return AMDGPU::isInlinableLiteral16(
1387         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1388         AsmParser->hasInv2PiInlineImm());
1389     }
1390 
1391     // Check if single precision literal is inlinable
1392     return AMDGPU::isInlinableLiteral32(
1393       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1394       AsmParser->hasInv2PiInlineImm());
1395   }
1396 
1397   // We got int literal token.
1398   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1399     return AMDGPU::isInlinableLiteral64(Imm.Val,
1400                                         AsmParser->hasInv2PiInlineImm());
1401   }
1402 
1403   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1404     return false;
1405   }
1406 
1407   if (type.getScalarSizeInBits() == 16) {
1408     return AMDGPU::isInlinableLiteral16(
1409       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1410       AsmParser->hasInv2PiInlineImm());
1411   }
1412 
1413   return AMDGPU::isInlinableLiteral32(
1414     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1415     AsmParser->hasInv2PiInlineImm());
1416 }
1417 
1418 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1419   // Check that this immediate can be added as literal
1420   if (!isImmTy(ImmTyNone)) {
1421     return false;
1422   }
1423 
1424   if (!Imm.IsFPImm) {
1425     // We got int literal token.
1426 
1427     if (type == MVT::f64 && hasFPModifiers()) {
1428       // Cannot apply fp modifiers to int literals preserving the same semantics
1429       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1430       // disable these cases.
1431       return false;
1432     }
1433 
1434     unsigned Size = type.getSizeInBits();
1435     if (Size == 64)
1436       Size = 32;
1437 
1438     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1439     // types.
1440     return isSafeTruncation(Imm.Val, Size);
1441   }
1442 
1443   // We got fp literal token
1444   if (type == MVT::f64) { // Expected 64-bit fp operand
1445     // We would set low 64-bits of literal to zeroes but we accept this literals
1446     return true;
1447   }
1448 
1449   if (type == MVT::i64) { // Expected 64-bit int operand
1450     // We don't allow fp literals in 64-bit integer instructions. It is
1451     // unclear how we should encode them.
1452     return false;
1453   }
1454 
1455   // We allow fp literals with f16x2 operands assuming that the specified
1456   // literal goes into the lower half and the upper half is zero. We also
1457   // require that the literal may be losslesly converted to f16.
1458   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1459                      (type == MVT::v2i16)? MVT::i16 : type;
1460 
1461   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1462   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1463 }
1464 
1465 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1466   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1467 }
1468 
1469 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1470   if (AsmParser->isVI())
1471     return isVReg32();
1472   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1473     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1474   else
1475     return false;
1476 }
1477 
1478 bool AMDGPUOperand::isSDWAFP16Operand() const {
1479   return isSDWAOperand(MVT::f16);
1480 }
1481 
1482 bool AMDGPUOperand::isSDWAFP32Operand() const {
1483   return isSDWAOperand(MVT::f32);
1484 }
1485 
1486 bool AMDGPUOperand::isSDWAInt16Operand() const {
1487   return isSDWAOperand(MVT::i16);
1488 }
1489 
1490 bool AMDGPUOperand::isSDWAInt32Operand() const {
1491   return isSDWAOperand(MVT::i32);
1492 }
1493 
1494 bool AMDGPUOperand::isBoolReg() const {
1495   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1496     isSCSrcB64() : isSCSrcB32();
1497 }
1498 
1499 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1500 {
1501   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1502   assert(Size == 2 || Size == 4 || Size == 8);
1503 
1504   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1505 
1506   if (Imm.Mods.Abs) {
1507     Val &= ~FpSignMask;
1508   }
1509   if (Imm.Mods.Neg) {
1510     Val ^= FpSignMask;
1511   }
1512 
1513   return Val;
1514 }
1515 
1516 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1517   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1518                              Inst.getNumOperands())) {
1519     addLiteralImmOperand(Inst, Imm.Val,
1520                          ApplyModifiers &
1521                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1522   } else {
1523     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1524     Inst.addOperand(MCOperand::createImm(Imm.Val));
1525   }
1526 }
1527 
1528 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1529   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1530   auto OpNum = Inst.getNumOperands();
1531   // Check that this operand accepts literals
1532   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1533 
1534   if (ApplyModifiers) {
1535     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1536     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1537     Val = applyInputFPModifiers(Val, Size);
1538   }
1539 
1540   APInt Literal(64, Val);
1541   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1542 
1543   if (Imm.IsFPImm) { // We got fp literal token
1544     switch (OpTy) {
1545     case AMDGPU::OPERAND_REG_IMM_INT64:
1546     case AMDGPU::OPERAND_REG_IMM_FP64:
1547     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1548     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1549       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1550                                        AsmParser->hasInv2PiInlineImm())) {
1551         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1552         return;
1553       }
1554 
1555       // Non-inlineable
1556       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1557         // For fp operands we check if low 32 bits are zeros
1558         if (Literal.getLoBits(32) != 0) {
1559           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1560           "Can't encode literal as exact 64-bit floating-point operand. "
1561           "Low 32-bits will be set to zero");
1562         }
1563 
1564         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1565         return;
1566       }
1567 
1568       // We don't allow fp literals in 64-bit integer instructions. It is
1569       // unclear how we should encode them. This case should be checked earlier
1570       // in predicate methods (isLiteralImm())
1571       llvm_unreachable("fp literal in 64-bit integer instruction.");
1572 
1573     case AMDGPU::OPERAND_REG_IMM_INT32:
1574     case AMDGPU::OPERAND_REG_IMM_FP32:
1575     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1576     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1577     case AMDGPU::OPERAND_REG_IMM_INT16:
1578     case AMDGPU::OPERAND_REG_IMM_FP16:
1579     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1580     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1581     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1582     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1583     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1584     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1585       bool lost;
1586       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1587       // Convert literal to single precision
1588       FPLiteral.convert(*getOpFltSemantics(OpTy),
1589                         APFloat::rmNearestTiesToEven, &lost);
1590       // We allow precision lost but not overflow or underflow. This should be
1591       // checked earlier in isLiteralImm()
1592 
1593       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1594       Inst.addOperand(MCOperand::createImm(ImmVal));
1595       return;
1596     }
1597     default:
1598       llvm_unreachable("invalid operand size");
1599     }
1600 
1601     return;
1602   }
1603 
1604   // We got int literal token.
1605   // Only sign extend inline immediates.
1606   switch (OpTy) {
1607   case AMDGPU::OPERAND_REG_IMM_INT32:
1608   case AMDGPU::OPERAND_REG_IMM_FP32:
1609   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1610   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1611   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1612   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1613     if (isSafeTruncation(Val, 32) &&
1614         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1615                                      AsmParser->hasInv2PiInlineImm())) {
1616       Inst.addOperand(MCOperand::createImm(Val));
1617       return;
1618     }
1619 
1620     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1621     return;
1622 
1623   case AMDGPU::OPERAND_REG_IMM_INT64:
1624   case AMDGPU::OPERAND_REG_IMM_FP64:
1625   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1626   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1627     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1628       Inst.addOperand(MCOperand::createImm(Val));
1629       return;
1630     }
1631 
1632     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1633     return;
1634 
1635   case AMDGPU::OPERAND_REG_IMM_INT16:
1636   case AMDGPU::OPERAND_REG_IMM_FP16:
1637   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1638   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1639     if (isSafeTruncation(Val, 16) &&
1640         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1641                                      AsmParser->hasInv2PiInlineImm())) {
1642       Inst.addOperand(MCOperand::createImm(Val));
1643       return;
1644     }
1645 
1646     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1647     return;
1648 
1649   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1650   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1651     assert(isSafeTruncation(Val, 16));
1652     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1653                                         AsmParser->hasInv2PiInlineImm()));
1654 
1655     Inst.addOperand(MCOperand::createImm(Val));
1656     return;
1657   }
1658   default:
1659     llvm_unreachable("invalid operand size");
1660   }
1661 }
1662 
1663 template <unsigned Bitwidth>
1664 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1665   APInt Literal(64, Imm.Val);
1666 
1667   if (!Imm.IsFPImm) {
1668     // We got int literal token.
1669     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1670     return;
1671   }
1672 
1673   bool Lost;
1674   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1675   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1676                     APFloat::rmNearestTiesToEven, &Lost);
1677   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1678 }
1679 
1680 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1681   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1682 }
1683 
1684 static bool isInlineValue(unsigned Reg) {
1685   switch (Reg) {
1686   case AMDGPU::SRC_SHARED_BASE:
1687   case AMDGPU::SRC_SHARED_LIMIT:
1688   case AMDGPU::SRC_PRIVATE_BASE:
1689   case AMDGPU::SRC_PRIVATE_LIMIT:
1690   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1691     return true;
1692   case AMDGPU::SRC_VCCZ:
1693   case AMDGPU::SRC_EXECZ:
1694   case AMDGPU::SRC_SCC:
1695     return true;
1696   default:
1697     return false;
1698   }
1699 }
1700 
1701 bool AMDGPUOperand::isInlineValue() const {
1702   return isRegKind() && ::isInlineValue(getReg());
1703 }
1704 
1705 //===----------------------------------------------------------------------===//
1706 // AsmParser
1707 //===----------------------------------------------------------------------===//
1708 
1709 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1710   if (Is == IS_VGPR) {
1711     switch (RegWidth) {
1712       default: return -1;
1713       case 1: return AMDGPU::VGPR_32RegClassID;
1714       case 2: return AMDGPU::VReg_64RegClassID;
1715       case 3: return AMDGPU::VReg_96RegClassID;
1716       case 4: return AMDGPU::VReg_128RegClassID;
1717       case 8: return AMDGPU::VReg_256RegClassID;
1718       case 16: return AMDGPU::VReg_512RegClassID;
1719     }
1720   } else if (Is == IS_TTMP) {
1721     switch (RegWidth) {
1722       default: return -1;
1723       case 1: return AMDGPU::TTMP_32RegClassID;
1724       case 2: return AMDGPU::TTMP_64RegClassID;
1725       case 4: return AMDGPU::TTMP_128RegClassID;
1726       case 8: return AMDGPU::TTMP_256RegClassID;
1727       case 16: return AMDGPU::TTMP_512RegClassID;
1728     }
1729   } else if (Is == IS_SGPR) {
1730     switch (RegWidth) {
1731       default: return -1;
1732       case 1: return AMDGPU::SGPR_32RegClassID;
1733       case 2: return AMDGPU::SGPR_64RegClassID;
1734       case 4: return AMDGPU::SGPR_128RegClassID;
1735       case 8: return AMDGPU::SGPR_256RegClassID;
1736       case 16: return AMDGPU::SGPR_512RegClassID;
1737     }
1738   }
1739   return -1;
1740 }
1741 
1742 static unsigned getSpecialRegForName(StringRef RegName) {
1743   return StringSwitch<unsigned>(RegName)
1744     .Case("exec", AMDGPU::EXEC)
1745     .Case("vcc", AMDGPU::VCC)
1746     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1747     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1748     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1749     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1750     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1751     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1752     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1753     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1754     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1755     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1756     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1757     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1758     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1759     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1760     .Case("m0", AMDGPU::M0)
1761     .Case("vccz", AMDGPU::SRC_VCCZ)
1762     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1763     .Case("execz", AMDGPU::SRC_EXECZ)
1764     .Case("src_execz", AMDGPU::SRC_EXECZ)
1765     .Case("scc", AMDGPU::SRC_SCC)
1766     .Case("src_scc", AMDGPU::SRC_SCC)
1767     .Case("tba", AMDGPU::TBA)
1768     .Case("tma", AMDGPU::TMA)
1769     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1770     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1771     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1772     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1773     .Case("vcc_lo", AMDGPU::VCC_LO)
1774     .Case("vcc_hi", AMDGPU::VCC_HI)
1775     .Case("exec_lo", AMDGPU::EXEC_LO)
1776     .Case("exec_hi", AMDGPU::EXEC_HI)
1777     .Case("tma_lo", AMDGPU::TMA_LO)
1778     .Case("tma_hi", AMDGPU::TMA_HI)
1779     .Case("tba_lo", AMDGPU::TBA_LO)
1780     .Case("tba_hi", AMDGPU::TBA_HI)
1781     .Case("null", AMDGPU::SGPR_NULL)
1782     .Default(0);
1783 }
1784 
1785 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1786                                     SMLoc &EndLoc) {
1787   auto R = parseRegister();
1788   if (!R) return true;
1789   assert(R->isReg());
1790   RegNo = R->getReg();
1791   StartLoc = R->getStartLoc();
1792   EndLoc = R->getEndLoc();
1793   return false;
1794 }
1795 
1796 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1797                                             RegisterKind RegKind, unsigned Reg1,
1798                                             unsigned RegNum) {
1799   switch (RegKind) {
1800   case IS_SPECIAL:
1801     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1802       Reg = AMDGPU::EXEC;
1803       RegWidth = 2;
1804       return true;
1805     }
1806     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1807       Reg = AMDGPU::FLAT_SCR;
1808       RegWidth = 2;
1809       return true;
1810     }
1811     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1812       Reg = AMDGPU::XNACK_MASK;
1813       RegWidth = 2;
1814       return true;
1815     }
1816     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1817       Reg = AMDGPU::VCC;
1818       RegWidth = 2;
1819       return true;
1820     }
1821     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1822       Reg = AMDGPU::TBA;
1823       RegWidth = 2;
1824       return true;
1825     }
1826     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1827       Reg = AMDGPU::TMA;
1828       RegWidth = 2;
1829       return true;
1830     }
1831     return false;
1832   case IS_VGPR:
1833   case IS_SGPR:
1834   case IS_TTMP:
1835     if (Reg1 != Reg + RegWidth) {
1836       return false;
1837     }
1838     RegWidth++;
1839     return true;
1840   default:
1841     llvm_unreachable("unexpected register kind");
1842   }
1843 }
1844 
1845 static const StringRef Registers[] = {
1846   { "v" },
1847   { "s" },
1848   { "ttmp" },
1849 };
1850 
1851 bool
1852 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1853                             const AsmToken &NextToken) const {
1854 
1855   // A list of consecutive registers: [s0,s1,s2,s3]
1856   if (Token.is(AsmToken::LBrac))
1857     return true;
1858 
1859   if (!Token.is(AsmToken::Identifier))
1860     return false;
1861 
1862   // A single register like s0 or a range of registers like s[0:1]
1863 
1864   StringRef RegName = Token.getString();
1865 
1866   for (StringRef Reg : Registers) {
1867     if (RegName.startswith(Reg)) {
1868       if (Reg.size() < RegName.size()) {
1869         unsigned RegNum;
1870         // A single register with an index: rXX
1871         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1872           return true;
1873       } else {
1874         // A range of registers: r[XX:YY].
1875         if (NextToken.is(AsmToken::LBrac))
1876           return true;
1877       }
1878     }
1879   }
1880 
1881   return getSpecialRegForName(RegName);
1882 }
1883 
1884 bool
1885 AMDGPUAsmParser::isRegister()
1886 {
1887   return isRegister(getToken(), peekToken());
1888 }
1889 
1890 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1891                                           unsigned &RegNum, unsigned &RegWidth,
1892                                           unsigned *DwordRegIndex) {
1893   if (DwordRegIndex) { *DwordRegIndex = 0; }
1894   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1895   if (getLexer().is(AsmToken::Identifier)) {
1896     StringRef RegName = Parser.getTok().getString();
1897     if ((Reg = getSpecialRegForName(RegName))) {
1898       Parser.Lex();
1899       RegKind = IS_SPECIAL;
1900     } else {
1901       unsigned RegNumIndex = 0;
1902       if (RegName[0] == 'v') {
1903         RegNumIndex = 1;
1904         RegKind = IS_VGPR;
1905       } else if (RegName[0] == 's') {
1906         RegNumIndex = 1;
1907         RegKind = IS_SGPR;
1908       } else if (RegName.startswith("ttmp")) {
1909         RegNumIndex = strlen("ttmp");
1910         RegKind = IS_TTMP;
1911       } else {
1912         return false;
1913       }
1914       if (RegName.size() > RegNumIndex) {
1915         // Single 32-bit register: vXX.
1916         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1917           return false;
1918         Parser.Lex();
1919         RegWidth = 1;
1920       } else {
1921         // Range of registers: v[XX:YY]. ":YY" is optional.
1922         Parser.Lex();
1923         int64_t RegLo, RegHi;
1924         if (getLexer().isNot(AsmToken::LBrac))
1925           return false;
1926         Parser.Lex();
1927 
1928         if (getParser().parseAbsoluteExpression(RegLo))
1929           return false;
1930 
1931         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1932         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1933           return false;
1934         Parser.Lex();
1935 
1936         if (isRBrace) {
1937           RegHi = RegLo;
1938         } else {
1939           if (getParser().parseAbsoluteExpression(RegHi))
1940             return false;
1941 
1942           if (getLexer().isNot(AsmToken::RBrac))
1943             return false;
1944           Parser.Lex();
1945         }
1946         RegNum = (unsigned) RegLo;
1947         RegWidth = (RegHi - RegLo) + 1;
1948       }
1949     }
1950   } else if (getLexer().is(AsmToken::LBrac)) {
1951     // List of consecutive registers: [s0,s1,s2,s3]
1952     Parser.Lex();
1953     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1954       return false;
1955     if (RegWidth != 1)
1956       return false;
1957     RegisterKind RegKind1;
1958     unsigned Reg1, RegNum1, RegWidth1;
1959     do {
1960       if (getLexer().is(AsmToken::Comma)) {
1961         Parser.Lex();
1962       } else if (getLexer().is(AsmToken::RBrac)) {
1963         Parser.Lex();
1964         break;
1965       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1966         if (RegWidth1 != 1) {
1967           return false;
1968         }
1969         if (RegKind1 != RegKind) {
1970           return false;
1971         }
1972         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1973           return false;
1974         }
1975       } else {
1976         return false;
1977       }
1978     } while (true);
1979   } else {
1980     return false;
1981   }
1982   switch (RegKind) {
1983   case IS_SPECIAL:
1984     RegNum = 0;
1985     RegWidth = 1;
1986     break;
1987   case IS_VGPR:
1988   case IS_SGPR:
1989   case IS_TTMP:
1990   {
1991     unsigned Size = 1;
1992     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1993       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1994       Size = std::min(RegWidth, 4u);
1995     }
1996     if (RegNum % Size != 0)
1997       return false;
1998     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1999     RegNum = RegNum / Size;
2000     int RCID = getRegClass(RegKind, RegWidth);
2001     if (RCID == -1)
2002       return false;
2003     const MCRegisterClass RC = TRI->getRegClass(RCID);
2004     if (RegNum >= RC.getNumRegs())
2005       return false;
2006     Reg = RC.getRegister(RegNum);
2007     break;
2008   }
2009 
2010   default:
2011     llvm_unreachable("unexpected register kind");
2012   }
2013 
2014   if (!subtargetHasRegister(*TRI, Reg))
2015     return false;
2016   return true;
2017 }
2018 
2019 Optional<StringRef>
2020 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2021   switch (RegKind) {
2022   case IS_VGPR:
2023     return StringRef(".amdgcn.next_free_vgpr");
2024   case IS_SGPR:
2025     return StringRef(".amdgcn.next_free_sgpr");
2026   default:
2027     return None;
2028   }
2029 }
2030 
2031 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2032   auto SymbolName = getGprCountSymbolName(RegKind);
2033   assert(SymbolName && "initializing invalid register kind");
2034   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2035   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2036 }
2037 
2038 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2039                                             unsigned DwordRegIndex,
2040                                             unsigned RegWidth) {
2041   // Symbols are only defined for GCN targets
2042   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2043     return true;
2044 
2045   auto SymbolName = getGprCountSymbolName(RegKind);
2046   if (!SymbolName)
2047     return true;
2048   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2049 
2050   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2051   int64_t OldCount;
2052 
2053   if (!Sym->isVariable())
2054     return !Error(getParser().getTok().getLoc(),
2055                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2056   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2057     return !Error(
2058         getParser().getTok().getLoc(),
2059         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2060 
2061   if (OldCount <= NewMax)
2062     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2063 
2064   return true;
2065 }
2066 
2067 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2068   const auto &Tok = Parser.getTok();
2069   SMLoc StartLoc = Tok.getLoc();
2070   SMLoc EndLoc = Tok.getEndLoc();
2071   RegisterKind RegKind;
2072   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2073 
2074   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2075     //FIXME: improve error messages (bug 41303).
2076     Error(StartLoc, "not a valid operand.");
2077     return nullptr;
2078   }
2079   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2080     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2081       return nullptr;
2082   } else
2083     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2084   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2085 }
2086 
2087 OperandMatchResultTy
2088 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2089   // TODO: add syntactic sugar for 1/(2*PI)
2090 
2091   assert(!isRegister());
2092   assert(!isModifier());
2093 
2094   const auto& Tok = getToken();
2095   const auto& NextTok = peekToken();
2096   bool IsReal = Tok.is(AsmToken::Real);
2097   SMLoc S = getLoc();
2098   bool Negate = false;
2099 
2100   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2101     lex();
2102     IsReal = true;
2103     Negate = true;
2104   }
2105 
2106   if (IsReal) {
2107     // Floating-point expressions are not supported.
2108     // Can only allow floating-point literals with an
2109     // optional sign.
2110 
2111     StringRef Num = getTokenStr();
2112     lex();
2113 
2114     APFloat RealVal(APFloat::IEEEdouble());
2115     auto roundMode = APFloat::rmNearestTiesToEven;
2116     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2117       return MatchOperand_ParseFail;
2118     }
2119     if (Negate)
2120       RealVal.changeSign();
2121 
2122     Operands.push_back(
2123       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2124                                AMDGPUOperand::ImmTyNone, true));
2125 
2126     return MatchOperand_Success;
2127 
2128   } else {
2129     int64_t IntVal;
2130     const MCExpr *Expr;
2131     SMLoc S = getLoc();
2132 
2133     if (HasSP3AbsModifier) {
2134       // This is a workaround for handling expressions
2135       // as arguments of SP3 'abs' modifier, for example:
2136       //     |1.0|
2137       //     |-1|
2138       //     |1+x|
2139       // This syntax is not compatible with syntax of standard
2140       // MC expressions (due to the trailing '|').
2141       SMLoc EndLoc;
2142       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2143         return MatchOperand_ParseFail;
2144     } else {
2145       if (Parser.parseExpression(Expr))
2146         return MatchOperand_ParseFail;
2147     }
2148 
2149     if (Expr->evaluateAsAbsolute(IntVal)) {
2150       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2151     } else {
2152       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2153     }
2154 
2155     return MatchOperand_Success;
2156   }
2157 
2158   return MatchOperand_NoMatch;
2159 }
2160 
2161 OperandMatchResultTy
2162 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2163   if (!isRegister())
2164     return MatchOperand_NoMatch;
2165 
2166   if (auto R = parseRegister()) {
2167     assert(R->isReg());
2168     Operands.push_back(std::move(R));
2169     return MatchOperand_Success;
2170   }
2171   return MatchOperand_ParseFail;
2172 }
2173 
2174 OperandMatchResultTy
2175 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2176   auto res = parseReg(Operands);
2177   if (res != MatchOperand_NoMatch) {
2178     return res;
2179   } else if (isModifier()) {
2180     return MatchOperand_NoMatch;
2181   } else {
2182     return parseImm(Operands, HasSP3AbsMod);
2183   }
2184 }
2185 
2186 bool
2187 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2188   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2189     const auto &str = Token.getString();
2190     return str == "abs" || str == "neg" || str == "sext";
2191   }
2192   return false;
2193 }
2194 
2195 bool
2196 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2197   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2198 }
2199 
2200 bool
2201 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2202   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2203 }
2204 
2205 bool
2206 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2207   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2208 }
2209 
2210 // Check if this is an operand modifier or an opcode modifier
2211 // which may look like an expression but it is not. We should
2212 // avoid parsing these modifiers as expressions. Currently
2213 // recognized sequences are:
2214 //   |...|
2215 //   abs(...)
2216 //   neg(...)
2217 //   sext(...)
2218 //   -reg
2219 //   -|...|
2220 //   -abs(...)
2221 //   name:...
2222 // Note that simple opcode modifiers like 'gds' may be parsed as
2223 // expressions; this is a special case. See getExpressionAsToken.
2224 //
2225 bool
2226 AMDGPUAsmParser::isModifier() {
2227 
2228   AsmToken Tok = getToken();
2229   AsmToken NextToken[2];
2230   peekTokens(NextToken);
2231 
2232   return isOperandModifier(Tok, NextToken[0]) ||
2233          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2234          isOpcodeModifierWithVal(Tok, NextToken[0]);
2235 }
2236 
2237 // Check if the current token is an SP3 'neg' modifier.
2238 // Currently this modifier is allowed in the following context:
2239 //
2240 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2241 // 2. Before an 'abs' modifier: -abs(...)
2242 // 3. Before an SP3 'abs' modifier: -|...|
2243 //
2244 // In all other cases "-" is handled as a part
2245 // of an expression that follows the sign.
2246 //
2247 // Note: When "-" is followed by an integer literal,
2248 // this is interpreted as integer negation rather
2249 // than a floating-point NEG modifier applied to N.
2250 // Beside being contr-intuitive, such use of floating-point
2251 // NEG modifier would have resulted in different meaning
2252 // of integer literals used with VOP1/2/C and VOP3,
2253 // for example:
2254 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2255 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2256 // Negative fp literals with preceding "-" are
2257 // handled likewise for unifomtity
2258 //
2259 bool
2260 AMDGPUAsmParser::parseSP3NegModifier() {
2261 
2262   AsmToken NextToken[2];
2263   peekTokens(NextToken);
2264 
2265   if (isToken(AsmToken::Minus) &&
2266       (isRegister(NextToken[0], NextToken[1]) ||
2267        NextToken[0].is(AsmToken::Pipe) ||
2268        isId(NextToken[0], "abs"))) {
2269     lex();
2270     return true;
2271   }
2272 
2273   return false;
2274 }
2275 
2276 OperandMatchResultTy
2277 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2278                                               bool AllowImm) {
2279   bool Neg, SP3Neg;
2280   bool Abs, SP3Abs;
2281   SMLoc Loc;
2282 
2283   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2284   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2285     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2286     return MatchOperand_ParseFail;
2287   }
2288 
2289   SP3Neg = parseSP3NegModifier();
2290 
2291   Loc = getLoc();
2292   Neg = trySkipId("neg");
2293   if (Neg && SP3Neg) {
2294     Error(Loc, "expected register or immediate");
2295     return MatchOperand_ParseFail;
2296   }
2297   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2298     return MatchOperand_ParseFail;
2299 
2300   Abs = trySkipId("abs");
2301   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2302     return MatchOperand_ParseFail;
2303 
2304   Loc = getLoc();
2305   SP3Abs = trySkipToken(AsmToken::Pipe);
2306   if (Abs && SP3Abs) {
2307     Error(Loc, "expected register or immediate");
2308     return MatchOperand_ParseFail;
2309   }
2310 
2311   OperandMatchResultTy Res;
2312   if (AllowImm) {
2313     Res = parseRegOrImm(Operands, SP3Abs);
2314   } else {
2315     Res = parseReg(Operands);
2316   }
2317   if (Res != MatchOperand_Success) {
2318     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2319   }
2320 
2321   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2322     return MatchOperand_ParseFail;
2323   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2324     return MatchOperand_ParseFail;
2325   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2326     return MatchOperand_ParseFail;
2327 
2328   AMDGPUOperand::Modifiers Mods;
2329   Mods.Abs = Abs || SP3Abs;
2330   Mods.Neg = Neg || SP3Neg;
2331 
2332   if (Mods.hasFPModifiers()) {
2333     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2334     if (Op.isExpr()) {
2335       Error(Op.getStartLoc(), "expected an absolute expression");
2336       return MatchOperand_ParseFail;
2337     }
2338     Op.setModifiers(Mods);
2339   }
2340   return MatchOperand_Success;
2341 }
2342 
2343 OperandMatchResultTy
2344 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2345                                                bool AllowImm) {
2346   bool Sext = trySkipId("sext");
2347   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2348     return MatchOperand_ParseFail;
2349 
2350   OperandMatchResultTy Res;
2351   if (AllowImm) {
2352     Res = parseRegOrImm(Operands);
2353   } else {
2354     Res = parseReg(Operands);
2355   }
2356   if (Res != MatchOperand_Success) {
2357     return Sext? MatchOperand_ParseFail : Res;
2358   }
2359 
2360   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2361     return MatchOperand_ParseFail;
2362 
2363   AMDGPUOperand::Modifiers Mods;
2364   Mods.Sext = Sext;
2365 
2366   if (Mods.hasIntModifiers()) {
2367     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2368     if (Op.isExpr()) {
2369       Error(Op.getStartLoc(), "expected an absolute expression");
2370       return MatchOperand_ParseFail;
2371     }
2372     Op.setModifiers(Mods);
2373   }
2374 
2375   return MatchOperand_Success;
2376 }
2377 
2378 OperandMatchResultTy
2379 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2380   return parseRegOrImmWithFPInputMods(Operands, false);
2381 }
2382 
2383 OperandMatchResultTy
2384 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2385   return parseRegOrImmWithIntInputMods(Operands, false);
2386 }
2387 
2388 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2389   auto Loc = getLoc();
2390   if (trySkipId("off")) {
2391     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2392                                                 AMDGPUOperand::ImmTyOff, false));
2393     return MatchOperand_Success;
2394   }
2395 
2396   if (!isRegister())
2397     return MatchOperand_NoMatch;
2398 
2399   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2400   if (Reg) {
2401     Operands.push_back(std::move(Reg));
2402     return MatchOperand_Success;
2403   }
2404 
2405   return MatchOperand_ParseFail;
2406 
2407 }
2408 
2409 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2410   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2411 
2412   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2413       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2414       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2415       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2416     return Match_InvalidOperand;
2417 
2418   if ((TSFlags & SIInstrFlags::VOP3) &&
2419       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2420       getForcedEncodingSize() != 64)
2421     return Match_PreferE32;
2422 
2423   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2424       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2425     // v_mac_f32/16 allow only dst_sel == DWORD;
2426     auto OpNum =
2427         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2428     const auto &Op = Inst.getOperand(OpNum);
2429     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2430       return Match_InvalidOperand;
2431     }
2432   }
2433 
2434   if (TSFlags & SIInstrFlags::FLAT) {
2435     // FIXME: Produces error without correct column reported.
2436     auto Opcode = Inst.getOpcode();
2437     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2438 
2439     const auto &Op = Inst.getOperand(OpNum);
2440     if (!hasFlatOffsets() && Op.getImm() != 0)
2441       return Match_InvalidOperand;
2442 
2443     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2444     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2445     if (isGFX10()) {
2446       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2447         if (!isInt<12>(Op.getImm()))
2448           return Match_InvalidOperand;
2449       } else {
2450         if (!isUInt<11>(Op.getImm()))
2451           return Match_InvalidOperand;
2452       }
2453     }
2454   }
2455 
2456   return Match_Success;
2457 }
2458 
2459 // What asm variants we should check
2460 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2461   if (getForcedEncodingSize() == 32) {
2462     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2463     return makeArrayRef(Variants);
2464   }
2465 
2466   if (isForcedVOP3()) {
2467     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2468     return makeArrayRef(Variants);
2469   }
2470 
2471   if (isForcedSDWA()) {
2472     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2473                                         AMDGPUAsmVariants::SDWA9};
2474     return makeArrayRef(Variants);
2475   }
2476 
2477   if (isForcedDPP()) {
2478     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2479     return makeArrayRef(Variants);
2480   }
2481 
2482   static const unsigned Variants[] = {
2483     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2484     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2485   };
2486 
2487   return makeArrayRef(Variants);
2488 }
2489 
2490 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2491   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2492   const unsigned Num = Desc.getNumImplicitUses();
2493   for (unsigned i = 0; i < Num; ++i) {
2494     unsigned Reg = Desc.ImplicitUses[i];
2495     switch (Reg) {
2496     case AMDGPU::FLAT_SCR:
2497     case AMDGPU::VCC:
2498     case AMDGPU::VCC_LO:
2499     case AMDGPU::VCC_HI:
2500     case AMDGPU::M0:
2501     case AMDGPU::SGPR_NULL:
2502       return Reg;
2503     default:
2504       break;
2505     }
2506   }
2507   return AMDGPU::NoRegister;
2508 }
2509 
2510 // NB: This code is correct only when used to check constant
2511 // bus limitations because GFX7 support no f16 inline constants.
2512 // Note that there are no cases when a GFX7 opcode violates
2513 // constant bus limitations due to the use of an f16 constant.
2514 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2515                                        unsigned OpIdx) const {
2516   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2517 
2518   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2519     return false;
2520   }
2521 
2522   const MCOperand &MO = Inst.getOperand(OpIdx);
2523 
2524   int64_t Val = MO.getImm();
2525   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2526 
2527   switch (OpSize) { // expected operand size
2528   case 8:
2529     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2530   case 4:
2531     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2532   case 2: {
2533     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2534     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2535         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2536         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2537         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2538       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2539     } else {
2540       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2541     }
2542   }
2543   default:
2544     llvm_unreachable("invalid operand size");
2545   }
2546 }
2547 
2548 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2549   const MCOperand &MO = Inst.getOperand(OpIdx);
2550   if (MO.isImm()) {
2551     return !isInlineConstant(Inst, OpIdx);
2552   }
2553   return !MO.isReg() ||
2554          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2555 }
2556 
2557 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2558   const unsigned Opcode = Inst.getOpcode();
2559   const MCInstrDesc &Desc = MII.get(Opcode);
2560   unsigned ConstantBusUseCount = 0;
2561   unsigned NumLiterals = 0;
2562   unsigned LiteralSize;
2563 
2564   if (Desc.TSFlags &
2565       (SIInstrFlags::VOPC |
2566        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2567        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2568        SIInstrFlags::SDWA)) {
2569     // Check special imm operands (used by madmk, etc)
2570     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2571       ++ConstantBusUseCount;
2572     }
2573 
2574     SmallDenseSet<unsigned> SGPRsUsed;
2575     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2576     if (SGPRUsed != AMDGPU::NoRegister) {
2577       SGPRsUsed.insert(SGPRUsed);
2578       ++ConstantBusUseCount;
2579     }
2580 
2581     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2582     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2583     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2584 
2585     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2586 
2587     for (int OpIdx : OpIndices) {
2588       if (OpIdx == -1) break;
2589 
2590       const MCOperand &MO = Inst.getOperand(OpIdx);
2591       if (usesConstantBus(Inst, OpIdx)) {
2592         if (MO.isReg()) {
2593           const unsigned Reg = mc2PseudoReg(MO.getReg());
2594           // Pairs of registers with a partial intersections like these
2595           //   s0, s[0:1]
2596           //   flat_scratch_lo, flat_scratch
2597           //   flat_scratch_lo, flat_scratch_hi
2598           // are theoretically valid but they are disabled anyway.
2599           // Note that this code mimics SIInstrInfo::verifyInstruction
2600           if (!SGPRsUsed.count(Reg)) {
2601             SGPRsUsed.insert(Reg);
2602             ++ConstantBusUseCount;
2603           }
2604           SGPRUsed = Reg;
2605         } else { // Expression or a literal
2606 
2607           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2608             continue; // special operand like VINTERP attr_chan
2609 
2610           // An instruction may use only one literal.
2611           // This has been validated on the previous step.
2612           // See validateVOP3Literal.
2613           // This literal may be used as more than one operand.
2614           // If all these operands are of the same size,
2615           // this literal counts as one scalar value.
2616           // Otherwise it counts as 2 scalar values.
2617           // See "GFX10 Shader Programming", section 3.6.2.3.
2618 
2619           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2620           if (Size < 4) Size = 4;
2621 
2622           if (NumLiterals == 0) {
2623             NumLiterals = 1;
2624             LiteralSize = Size;
2625           } else if (LiteralSize != Size) {
2626             NumLiterals = 2;
2627           }
2628         }
2629       }
2630     }
2631   }
2632   ConstantBusUseCount += NumLiterals;
2633 
2634   if (isGFX10())
2635     return ConstantBusUseCount <= 2;
2636 
2637   return ConstantBusUseCount <= 1;
2638 }
2639 
2640 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2641   const unsigned Opcode = Inst.getOpcode();
2642   const MCInstrDesc &Desc = MII.get(Opcode);
2643 
2644   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2645   if (DstIdx == -1 ||
2646       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2647     return true;
2648   }
2649 
2650   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2651 
2652   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2653   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2654   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2655 
2656   assert(DstIdx != -1);
2657   const MCOperand &Dst = Inst.getOperand(DstIdx);
2658   assert(Dst.isReg());
2659   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2660 
2661   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2662 
2663   for (int SrcIdx : SrcIndices) {
2664     if (SrcIdx == -1) break;
2665     const MCOperand &Src = Inst.getOperand(SrcIdx);
2666     if (Src.isReg()) {
2667       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2668       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2669         return false;
2670       }
2671     }
2672   }
2673 
2674   return true;
2675 }
2676 
2677 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2678 
2679   const unsigned Opc = Inst.getOpcode();
2680   const MCInstrDesc &Desc = MII.get(Opc);
2681 
2682   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2683     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2684     assert(ClampIdx != -1);
2685     return Inst.getOperand(ClampIdx).getImm() == 0;
2686   }
2687 
2688   return true;
2689 }
2690 
2691 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2692 
2693   const unsigned Opc = Inst.getOpcode();
2694   const MCInstrDesc &Desc = MII.get(Opc);
2695 
2696   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2697     return true;
2698 
2699   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2700   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2701   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2702 
2703   assert(VDataIdx != -1);
2704   assert(DMaskIdx != -1);
2705   assert(TFEIdx != -1);
2706 
2707   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2708   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2709   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2710   if (DMask == 0)
2711     DMask = 1;
2712 
2713   unsigned DataSize =
2714     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2715   if (hasPackedD16()) {
2716     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2717     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2718       DataSize = (DataSize + 1) / 2;
2719   }
2720 
2721   return (VDataSize / 4) == DataSize + TFESize;
2722 }
2723 
2724 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2725   const unsigned Opc = Inst.getOpcode();
2726   const MCInstrDesc &Desc = MII.get(Opc);
2727 
2728   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2729     return true;
2730 
2731   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2732   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2733       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2734   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2735   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2736   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2737 
2738   assert(VAddr0Idx != -1);
2739   assert(SrsrcIdx != -1);
2740   assert(DimIdx != -1);
2741   assert(SrsrcIdx > VAddr0Idx);
2742 
2743   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2744   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2745   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2746   unsigned VAddrSize =
2747       IsNSA ? SrsrcIdx - VAddr0Idx
2748             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2749 
2750   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2751                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2752                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2753                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2754   if (!IsNSA) {
2755     if (AddrSize > 8)
2756       AddrSize = 16;
2757     else if (AddrSize > 4)
2758       AddrSize = 8;
2759   }
2760 
2761   return VAddrSize == AddrSize;
2762 }
2763 
2764 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2765 
2766   const unsigned Opc = Inst.getOpcode();
2767   const MCInstrDesc &Desc = MII.get(Opc);
2768 
2769   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2770     return true;
2771   if (!Desc.mayLoad() || !Desc.mayStore())
2772     return true; // Not atomic
2773 
2774   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2775   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2776 
2777   // This is an incomplete check because image_atomic_cmpswap
2778   // may only use 0x3 and 0xf while other atomic operations
2779   // may use 0x1 and 0x3. However these limitations are
2780   // verified when we check that dmask matches dst size.
2781   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2782 }
2783 
2784 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2785 
2786   const unsigned Opc = Inst.getOpcode();
2787   const MCInstrDesc &Desc = MII.get(Opc);
2788 
2789   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2790     return true;
2791 
2792   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2793   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2794 
2795   // GATHER4 instructions use dmask in a different fashion compared to
2796   // other MIMG instructions. The only useful DMASK values are
2797   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2798   // (red,red,red,red) etc.) The ISA document doesn't mention
2799   // this.
2800   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2801 }
2802 
2803 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2804 
2805   const unsigned Opc = Inst.getOpcode();
2806   const MCInstrDesc &Desc = MII.get(Opc);
2807 
2808   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2809     return true;
2810 
2811   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2812   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2813     if (isCI() || isSI())
2814       return false;
2815   }
2816 
2817   return true;
2818 }
2819 
2820 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2821   const unsigned Opc = Inst.getOpcode();
2822   const MCInstrDesc &Desc = MII.get(Opc);
2823 
2824   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2825     return true;
2826 
2827   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2828   if (DimIdx < 0)
2829     return true;
2830 
2831   long Imm = Inst.getOperand(DimIdx).getImm();
2832   if (Imm < 0 || Imm >= 8)
2833     return false;
2834 
2835   return true;
2836 }
2837 
2838 static bool IsRevOpcode(const unsigned Opcode)
2839 {
2840   switch (Opcode) {
2841   case AMDGPU::V_SUBREV_F32_e32:
2842   case AMDGPU::V_SUBREV_F32_e64:
2843   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2844   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2845   case AMDGPU::V_SUBREV_F32_e32_vi:
2846   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2847   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2848   case AMDGPU::V_SUBREV_F32_e64_vi:
2849 
2850   case AMDGPU::V_SUBREV_I32_e32:
2851   case AMDGPU::V_SUBREV_I32_e64:
2852   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2853   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2854 
2855   case AMDGPU::V_SUBBREV_U32_e32:
2856   case AMDGPU::V_SUBBREV_U32_e64:
2857   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2858   case AMDGPU::V_SUBBREV_U32_e32_vi:
2859   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2860   case AMDGPU::V_SUBBREV_U32_e64_vi:
2861 
2862   case AMDGPU::V_SUBREV_U32_e32:
2863   case AMDGPU::V_SUBREV_U32_e64:
2864   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2865   case AMDGPU::V_SUBREV_U32_e32_vi:
2866   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2867   case AMDGPU::V_SUBREV_U32_e64_vi:
2868 
2869   case AMDGPU::V_SUBREV_F16_e32:
2870   case AMDGPU::V_SUBREV_F16_e64:
2871   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2872   case AMDGPU::V_SUBREV_F16_e32_vi:
2873   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2874   case AMDGPU::V_SUBREV_F16_e64_vi:
2875 
2876   case AMDGPU::V_SUBREV_U16_e32:
2877   case AMDGPU::V_SUBREV_U16_e64:
2878   case AMDGPU::V_SUBREV_U16_e32_vi:
2879   case AMDGPU::V_SUBREV_U16_e64_vi:
2880 
2881   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2882   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2883   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2884 
2885   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2886   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2887 
2888   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2889   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2890 
2891   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2892   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2893 
2894   case AMDGPU::V_LSHRREV_B32_e32:
2895   case AMDGPU::V_LSHRREV_B32_e64:
2896   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2897   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2898   case AMDGPU::V_LSHRREV_B32_e32_vi:
2899   case AMDGPU::V_LSHRREV_B32_e64_vi:
2900   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2901   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2902 
2903   case AMDGPU::V_ASHRREV_I32_e32:
2904   case AMDGPU::V_ASHRREV_I32_e64:
2905   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2906   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2907   case AMDGPU::V_ASHRREV_I32_e32_vi:
2908   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2909   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2910   case AMDGPU::V_ASHRREV_I32_e64_vi:
2911 
2912   case AMDGPU::V_LSHLREV_B32_e32:
2913   case AMDGPU::V_LSHLREV_B32_e64:
2914   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2915   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2916   case AMDGPU::V_LSHLREV_B32_e32_vi:
2917   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2918   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2919   case AMDGPU::V_LSHLREV_B32_e64_vi:
2920 
2921   case AMDGPU::V_LSHLREV_B16_e32:
2922   case AMDGPU::V_LSHLREV_B16_e64:
2923   case AMDGPU::V_LSHLREV_B16_e32_vi:
2924   case AMDGPU::V_LSHLREV_B16_e64_vi:
2925   case AMDGPU::V_LSHLREV_B16_gfx10:
2926 
2927   case AMDGPU::V_LSHRREV_B16_e32:
2928   case AMDGPU::V_LSHRREV_B16_e64:
2929   case AMDGPU::V_LSHRREV_B16_e32_vi:
2930   case AMDGPU::V_LSHRREV_B16_e64_vi:
2931   case AMDGPU::V_LSHRREV_B16_gfx10:
2932 
2933   case AMDGPU::V_ASHRREV_I16_e32:
2934   case AMDGPU::V_ASHRREV_I16_e64:
2935   case AMDGPU::V_ASHRREV_I16_e32_vi:
2936   case AMDGPU::V_ASHRREV_I16_e64_vi:
2937   case AMDGPU::V_ASHRREV_I16_gfx10:
2938 
2939   case AMDGPU::V_LSHLREV_B64:
2940   case AMDGPU::V_LSHLREV_B64_gfx10:
2941   case AMDGPU::V_LSHLREV_B64_vi:
2942 
2943   case AMDGPU::V_LSHRREV_B64:
2944   case AMDGPU::V_LSHRREV_B64_gfx10:
2945   case AMDGPU::V_LSHRREV_B64_vi:
2946 
2947   case AMDGPU::V_ASHRREV_I64:
2948   case AMDGPU::V_ASHRREV_I64_gfx10:
2949   case AMDGPU::V_ASHRREV_I64_vi:
2950 
2951   case AMDGPU::V_PK_LSHLREV_B16:
2952   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2953   case AMDGPU::V_PK_LSHLREV_B16_vi:
2954 
2955   case AMDGPU::V_PK_LSHRREV_B16:
2956   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2957   case AMDGPU::V_PK_LSHRREV_B16_vi:
2958   case AMDGPU::V_PK_ASHRREV_I16:
2959   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2960   case AMDGPU::V_PK_ASHRREV_I16_vi:
2961     return true;
2962   default:
2963     return false;
2964   }
2965 }
2966 
2967 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2968 
2969   using namespace SIInstrFlags;
2970   const unsigned Opcode = Inst.getOpcode();
2971   const MCInstrDesc &Desc = MII.get(Opcode);
2972 
2973   // lds_direct register is defined so that it can be used
2974   // with 9-bit operands only. Ignore encodings which do not accept these.
2975   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2976     return true;
2977 
2978   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2979   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2980   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2981 
2982   const int SrcIndices[] = { Src1Idx, Src2Idx };
2983 
2984   // lds_direct cannot be specified as either src1 or src2.
2985   for (int SrcIdx : SrcIndices) {
2986     if (SrcIdx == -1) break;
2987     const MCOperand &Src = Inst.getOperand(SrcIdx);
2988     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2989       return false;
2990     }
2991   }
2992 
2993   if (Src0Idx == -1)
2994     return true;
2995 
2996   const MCOperand &Src = Inst.getOperand(Src0Idx);
2997   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2998     return true;
2999 
3000   // lds_direct is specified as src0. Check additional limitations.
3001   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3002 }
3003 
3004 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3005   unsigned Opcode = Inst.getOpcode();
3006   const MCInstrDesc &Desc = MII.get(Opcode);
3007   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3008     return true;
3009 
3010   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3011   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3012 
3013   const int OpIndices[] = { Src0Idx, Src1Idx };
3014 
3015   unsigned NumLiterals = 0;
3016   uint32_t LiteralValue;
3017 
3018   for (int OpIdx : OpIndices) {
3019     if (OpIdx == -1) break;
3020 
3021     const MCOperand &MO = Inst.getOperand(OpIdx);
3022     if (MO.isImm() &&
3023         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3024         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3025         !isInlineConstant(Inst, OpIdx)) {
3026       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3027       if (NumLiterals == 0 || LiteralValue != Value) {
3028         LiteralValue = Value;
3029         ++NumLiterals;
3030       }
3031     }
3032   }
3033 
3034   return NumLiterals <= 1;
3035 }
3036 
3037 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3038   const unsigned Opc = Inst.getOpcode();
3039   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3040       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3041     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3042     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3043 
3044     if (OpSel & ~3)
3045       return false;
3046   }
3047   return true;
3048 }
3049 
3050 // Check if VCC register matches wavefront size
3051 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3052   auto FB = getFeatureBits();
3053   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3054     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3055 }
3056 
3057 // VOP3 literal is only allowed in GFX10+ and only one can be used
3058 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3059   unsigned Opcode = Inst.getOpcode();
3060   const MCInstrDesc &Desc = MII.get(Opcode);
3061   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3062     return true;
3063 
3064   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3065   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3066   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3067 
3068   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3069 
3070   unsigned NumLiterals = 0;
3071   uint32_t LiteralValue;
3072 
3073   for (int OpIdx : OpIndices) {
3074     if (OpIdx == -1) break;
3075 
3076     const MCOperand &MO = Inst.getOperand(OpIdx);
3077     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3078       continue;
3079 
3080     if (!isInlineConstant(Inst, OpIdx)) {
3081       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3082       if (NumLiterals == 0 || LiteralValue != Value) {
3083         LiteralValue = Value;
3084         ++NumLiterals;
3085       }
3086     }
3087   }
3088 
3089   return !NumLiterals ||
3090          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3091 }
3092 
3093 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3094                                           const SMLoc &IDLoc) {
3095   if (!validateLdsDirect(Inst)) {
3096     Error(IDLoc,
3097       "invalid use of lds_direct");
3098     return false;
3099   }
3100   if (!validateSOPLiteral(Inst)) {
3101     Error(IDLoc,
3102       "only one literal operand is allowed");
3103     return false;
3104   }
3105   if (!validateVOP3Literal(Inst)) {
3106     Error(IDLoc,
3107       "invalid literal operand");
3108     return false;
3109   }
3110   if (!validateConstantBusLimitations(Inst)) {
3111     Error(IDLoc,
3112       "invalid operand (violates constant bus restrictions)");
3113     return false;
3114   }
3115   if (!validateEarlyClobberLimitations(Inst)) {
3116     Error(IDLoc,
3117       "destination must be different than all sources");
3118     return false;
3119   }
3120   if (!validateIntClampSupported(Inst)) {
3121     Error(IDLoc,
3122       "integer clamping is not supported on this GPU");
3123     return false;
3124   }
3125   if (!validateOpSel(Inst)) {
3126     Error(IDLoc,
3127       "invalid op_sel operand");
3128     return false;
3129   }
3130   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3131   if (!validateMIMGD16(Inst)) {
3132     Error(IDLoc,
3133       "d16 modifier is not supported on this GPU");
3134     return false;
3135   }
3136   if (!validateMIMGDim(Inst)) {
3137     Error(IDLoc, "dim modifier is required on this GPU");
3138     return false;
3139   }
3140   if (!validateMIMGDataSize(Inst)) {
3141     Error(IDLoc,
3142       "image data size does not match dmask and tfe");
3143     return false;
3144   }
3145   if (!validateMIMGAddrSize(Inst)) {
3146     Error(IDLoc,
3147       "image address size does not match dim and a16");
3148     return false;
3149   }
3150   if (!validateMIMGAtomicDMask(Inst)) {
3151     Error(IDLoc,
3152       "invalid atomic image dmask");
3153     return false;
3154   }
3155   if (!validateMIMGGatherDMask(Inst)) {
3156     Error(IDLoc,
3157       "invalid image_gather dmask: only one bit must be set");
3158     return false;
3159   }
3160 
3161   return true;
3162 }
3163 
3164 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3165                                             const FeatureBitset &FBS,
3166                                             unsigned VariantID = 0);
3167 
3168 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3169                                               OperandVector &Operands,
3170                                               MCStreamer &Out,
3171                                               uint64_t &ErrorInfo,
3172                                               bool MatchingInlineAsm) {
3173   MCInst Inst;
3174   unsigned Result = Match_Success;
3175   for (auto Variant : getMatchedVariants()) {
3176     uint64_t EI;
3177     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3178                                   Variant);
3179     // We order match statuses from least to most specific. We use most specific
3180     // status as resulting
3181     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3182     if ((R == Match_Success) ||
3183         (R == Match_PreferE32) ||
3184         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3185         (R == Match_InvalidOperand && Result != Match_MissingFeature
3186                                    && Result != Match_PreferE32) ||
3187         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3188                                    && Result != Match_MissingFeature
3189                                    && Result != Match_PreferE32)) {
3190       Result = R;
3191       ErrorInfo = EI;
3192     }
3193     if (R == Match_Success)
3194       break;
3195   }
3196 
3197   switch (Result) {
3198   default: break;
3199   case Match_Success:
3200     if (!validateInstruction(Inst, IDLoc)) {
3201       return true;
3202     }
3203     Inst.setLoc(IDLoc);
3204     Out.EmitInstruction(Inst, getSTI());
3205     return false;
3206 
3207   case Match_MissingFeature:
3208     return Error(IDLoc, "instruction not supported on this GPU");
3209 
3210   case Match_MnemonicFail: {
3211     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3212     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3213         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3214     return Error(IDLoc, "invalid instruction" + Suggestion,
3215                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3216   }
3217 
3218   case Match_InvalidOperand: {
3219     SMLoc ErrorLoc = IDLoc;
3220     if (ErrorInfo != ~0ULL) {
3221       if (ErrorInfo >= Operands.size()) {
3222         return Error(IDLoc, "too few operands for instruction");
3223       }
3224       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3225       if (ErrorLoc == SMLoc())
3226         ErrorLoc = IDLoc;
3227     }
3228     return Error(ErrorLoc, "invalid operand for instruction");
3229   }
3230 
3231   case Match_PreferE32:
3232     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3233                         "should be encoded as e32");
3234   }
3235   llvm_unreachable("Implement any new match types added!");
3236 }
3237 
3238 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3239   int64_t Tmp = -1;
3240   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3241     return true;
3242   }
3243   if (getParser().parseAbsoluteExpression(Tmp)) {
3244     return true;
3245   }
3246   Ret = static_cast<uint32_t>(Tmp);
3247   return false;
3248 }
3249 
3250 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3251                                                uint32_t &Minor) {
3252   if (ParseAsAbsoluteExpression(Major))
3253     return TokError("invalid major version");
3254 
3255   if (getLexer().isNot(AsmToken::Comma))
3256     return TokError("minor version number required, comma expected");
3257   Lex();
3258 
3259   if (ParseAsAbsoluteExpression(Minor))
3260     return TokError("invalid minor version");
3261 
3262   return false;
3263 }
3264 
3265 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3266   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3267     return TokError("directive only supported for amdgcn architecture");
3268 
3269   std::string Target;
3270 
3271   SMLoc TargetStart = getTok().getLoc();
3272   if (getParser().parseEscapedString(Target))
3273     return true;
3274   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3275 
3276   std::string ExpectedTarget;
3277   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3278   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3279 
3280   if (Target != ExpectedTargetOS.str())
3281     return getParser().Error(TargetRange.Start, "target must match options",
3282                              TargetRange);
3283 
3284   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3285   return false;
3286 }
3287 
3288 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3289   return getParser().Error(Range.Start, "value out of range", Range);
3290 }
3291 
3292 bool AMDGPUAsmParser::calculateGPRBlocks(
3293     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3294     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3295     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3296     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3297   // TODO(scott.linder): These calculations are duplicated from
3298   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3299   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3300 
3301   unsigned NumVGPRs = NextFreeVGPR;
3302   unsigned NumSGPRs = NextFreeSGPR;
3303 
3304   if (Version.Major >= 10)
3305     NumSGPRs = 0;
3306   else {
3307     unsigned MaxAddressableNumSGPRs =
3308         IsaInfo::getAddressableNumSGPRs(&getSTI());
3309 
3310     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3311         NumSGPRs > MaxAddressableNumSGPRs)
3312       return OutOfRangeError(SGPRRange);
3313 
3314     NumSGPRs +=
3315         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3316 
3317     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3318         NumSGPRs > MaxAddressableNumSGPRs)
3319       return OutOfRangeError(SGPRRange);
3320 
3321     if (Features.test(FeatureSGPRInitBug))
3322       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3323   }
3324 
3325   VGPRBlocks =
3326       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3327   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3328 
3329   return false;
3330 }
3331 
3332 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3333   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3334     return TokError("directive only supported for amdgcn architecture");
3335 
3336   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3337     return TokError("directive only supported for amdhsa OS");
3338 
3339   StringRef KernelName;
3340   if (getParser().parseIdentifier(KernelName))
3341     return true;
3342 
3343   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3344 
3345   StringSet<> Seen;
3346 
3347   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3348 
3349   SMRange VGPRRange;
3350   uint64_t NextFreeVGPR = 0;
3351   SMRange SGPRRange;
3352   uint64_t NextFreeSGPR = 0;
3353   unsigned UserSGPRCount = 0;
3354   bool ReserveVCC = true;
3355   bool ReserveFlatScr = true;
3356   bool ReserveXNACK = hasXNACK();
3357   Optional<bool> EnableWavefrontSize32;
3358 
3359   while (true) {
3360     while (getLexer().is(AsmToken::EndOfStatement))
3361       Lex();
3362 
3363     if (getLexer().isNot(AsmToken::Identifier))
3364       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3365 
3366     StringRef ID = getTok().getIdentifier();
3367     SMRange IDRange = getTok().getLocRange();
3368     Lex();
3369 
3370     if (ID == ".end_amdhsa_kernel")
3371       break;
3372 
3373     if (Seen.find(ID) != Seen.end())
3374       return TokError(".amdhsa_ directives cannot be repeated");
3375     Seen.insert(ID);
3376 
3377     SMLoc ValStart = getTok().getLoc();
3378     int64_t IVal;
3379     if (getParser().parseAbsoluteExpression(IVal))
3380       return true;
3381     SMLoc ValEnd = getTok().getLoc();
3382     SMRange ValRange = SMRange(ValStart, ValEnd);
3383 
3384     if (IVal < 0)
3385       return OutOfRangeError(ValRange);
3386 
3387     uint64_t Val = IVal;
3388 
3389 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3390   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3391     return OutOfRangeError(RANGE);                                             \
3392   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3393 
3394     if (ID == ".amdhsa_group_segment_fixed_size") {
3395       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3396         return OutOfRangeError(ValRange);
3397       KD.group_segment_fixed_size = Val;
3398     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3399       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3400         return OutOfRangeError(ValRange);
3401       KD.private_segment_fixed_size = Val;
3402     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3403       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3404                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3405                        Val, ValRange);
3406       UserSGPRCount += 4;
3407     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3408       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3409                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3410                        ValRange);
3411       UserSGPRCount += 2;
3412     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3413       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3414                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3415                        ValRange);
3416       UserSGPRCount += 2;
3417     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3418       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3419                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3420                        Val, ValRange);
3421       UserSGPRCount += 2;
3422     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3423       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3424                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3425                        ValRange);
3426       UserSGPRCount += 2;
3427     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3428       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3429                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3430                        ValRange);
3431       UserSGPRCount += 2;
3432     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3433       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3434                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3435                        Val, ValRange);
3436       UserSGPRCount += 1;
3437     } else if (ID == ".amdhsa_wavefront_size32") {
3438       if (IVersion.Major < 10)
3439         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3440                                  IDRange);
3441       EnableWavefrontSize32 = Val;
3442       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3443                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3444                        Val, ValRange);
3445     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3446       PARSE_BITS_ENTRY(
3447           KD.compute_pgm_rsrc2,
3448           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3449           ValRange);
3450     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3451       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3452                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3453                        ValRange);
3454     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3455       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3456                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3457                        ValRange);
3458     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3459       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3460                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3461                        ValRange);
3462     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3463       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3464                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3465                        ValRange);
3466     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3467       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3468                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3469                        ValRange);
3470     } else if (ID == ".amdhsa_next_free_vgpr") {
3471       VGPRRange = ValRange;
3472       NextFreeVGPR = Val;
3473     } else if (ID == ".amdhsa_next_free_sgpr") {
3474       SGPRRange = ValRange;
3475       NextFreeSGPR = Val;
3476     } else if (ID == ".amdhsa_reserve_vcc") {
3477       if (!isUInt<1>(Val))
3478         return OutOfRangeError(ValRange);
3479       ReserveVCC = Val;
3480     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3481       if (IVersion.Major < 7)
3482         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3483                                  IDRange);
3484       if (!isUInt<1>(Val))
3485         return OutOfRangeError(ValRange);
3486       ReserveFlatScr = Val;
3487     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3488       if (IVersion.Major < 8)
3489         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3490                                  IDRange);
3491       if (!isUInt<1>(Val))
3492         return OutOfRangeError(ValRange);
3493       ReserveXNACK = Val;
3494     } else if (ID == ".amdhsa_float_round_mode_32") {
3495       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3496                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3497     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3498       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3499                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3500     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3501       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3502                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3503     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3504       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3505                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3506                        ValRange);
3507     } else if (ID == ".amdhsa_dx10_clamp") {
3508       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3509                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3510     } else if (ID == ".amdhsa_ieee_mode") {
3511       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3512                        Val, ValRange);
3513     } else if (ID == ".amdhsa_fp16_overflow") {
3514       if (IVersion.Major < 9)
3515         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3516                                  IDRange);
3517       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3518                        ValRange);
3519     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3520       if (IVersion.Major < 10)
3521         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3522                                  IDRange);
3523       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3524                        ValRange);
3525     } else if (ID == ".amdhsa_memory_ordered") {
3526       if (IVersion.Major < 10)
3527         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3528                                  IDRange);
3529       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3530                        ValRange);
3531     } else if (ID == ".amdhsa_forward_progress") {
3532       if (IVersion.Major < 10)
3533         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3534                                  IDRange);
3535       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3536                        ValRange);
3537     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3538       PARSE_BITS_ENTRY(
3539           KD.compute_pgm_rsrc2,
3540           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3541           ValRange);
3542     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3543       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3544                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3545                        Val, ValRange);
3546     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3547       PARSE_BITS_ENTRY(
3548           KD.compute_pgm_rsrc2,
3549           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3550           ValRange);
3551     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3552       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3553                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3554                        Val, ValRange);
3555     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3556       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3557                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3558                        Val, ValRange);
3559     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3560       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3561                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3562                        Val, ValRange);
3563     } else if (ID == ".amdhsa_exception_int_div_zero") {
3564       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3565                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3566                        Val, ValRange);
3567     } else {
3568       return getParser().Error(IDRange.Start,
3569                                "unknown .amdhsa_kernel directive", IDRange);
3570     }
3571 
3572 #undef PARSE_BITS_ENTRY
3573   }
3574 
3575   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3576     return TokError(".amdhsa_next_free_vgpr directive is required");
3577 
3578   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3579     return TokError(".amdhsa_next_free_sgpr directive is required");
3580 
3581   unsigned VGPRBlocks;
3582   unsigned SGPRBlocks;
3583   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3584                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3585                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3586                          SGPRBlocks))
3587     return true;
3588 
3589   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3590           VGPRBlocks))
3591     return OutOfRangeError(VGPRRange);
3592   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3593                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3594 
3595   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3596           SGPRBlocks))
3597     return OutOfRangeError(SGPRRange);
3598   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3599                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3600                   SGPRBlocks);
3601 
3602   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3603     return TokError("too many user SGPRs enabled");
3604   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3605                   UserSGPRCount);
3606 
3607   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3608       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3609       ReserveFlatScr, ReserveXNACK);
3610   return false;
3611 }
3612 
3613 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3614   uint32_t Major;
3615   uint32_t Minor;
3616 
3617   if (ParseDirectiveMajorMinor(Major, Minor))
3618     return true;
3619 
3620   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3621   return false;
3622 }
3623 
3624 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3625   uint32_t Major;
3626   uint32_t Minor;
3627   uint32_t Stepping;
3628   StringRef VendorName;
3629   StringRef ArchName;
3630 
3631   // If this directive has no arguments, then use the ISA version for the
3632   // targeted GPU.
3633   if (getLexer().is(AsmToken::EndOfStatement)) {
3634     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3635     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3636                                                       ISA.Stepping,
3637                                                       "AMD", "AMDGPU");
3638     return false;
3639   }
3640 
3641   if (ParseDirectiveMajorMinor(Major, Minor))
3642     return true;
3643 
3644   if (getLexer().isNot(AsmToken::Comma))
3645     return TokError("stepping version number required, comma expected");
3646   Lex();
3647 
3648   if (ParseAsAbsoluteExpression(Stepping))
3649     return TokError("invalid stepping version");
3650 
3651   if (getLexer().isNot(AsmToken::Comma))
3652     return TokError("vendor name required, comma expected");
3653   Lex();
3654 
3655   if (getLexer().isNot(AsmToken::String))
3656     return TokError("invalid vendor name");
3657 
3658   VendorName = getLexer().getTok().getStringContents();
3659   Lex();
3660 
3661   if (getLexer().isNot(AsmToken::Comma))
3662     return TokError("arch name required, comma expected");
3663   Lex();
3664 
3665   if (getLexer().isNot(AsmToken::String))
3666     return TokError("invalid arch name");
3667 
3668   ArchName = getLexer().getTok().getStringContents();
3669   Lex();
3670 
3671   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3672                                                     VendorName, ArchName);
3673   return false;
3674 }
3675 
3676 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3677                                                amd_kernel_code_t &Header) {
3678   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3679   // assembly for backwards compatibility.
3680   if (ID == "max_scratch_backing_memory_byte_size") {
3681     Parser.eatToEndOfStatement();
3682     return false;
3683   }
3684 
3685   SmallString<40> ErrStr;
3686   raw_svector_ostream Err(ErrStr);
3687   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3688     return TokError(Err.str());
3689   }
3690   Lex();
3691 
3692   if (ID == "enable_wavefront_size32") {
3693     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3694       if (!isGFX10())
3695         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3696       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3697         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3698     } else {
3699       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3700         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3701     }
3702   }
3703 
3704   if (ID == "wavefront_size") {
3705     if (Header.wavefront_size == 5) {
3706       if (!isGFX10())
3707         return TokError("wavefront_size=5 is only allowed on GFX10+");
3708       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3709         return TokError("wavefront_size=5 requires +WavefrontSize32");
3710     } else if (Header.wavefront_size == 6) {
3711       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3712         return TokError("wavefront_size=6 requires +WavefrontSize64");
3713     }
3714   }
3715 
3716   if (ID == "enable_wgp_mode") {
3717     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3718       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3719   }
3720 
3721   if (ID == "enable_mem_ordered") {
3722     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3723       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3724   }
3725 
3726   if (ID == "enable_fwd_progress") {
3727     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3728       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3729   }
3730 
3731   return false;
3732 }
3733 
3734 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3735   amd_kernel_code_t Header;
3736   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3737 
3738   while (true) {
3739     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3740     // will set the current token to EndOfStatement.
3741     while(getLexer().is(AsmToken::EndOfStatement))
3742       Lex();
3743 
3744     if (getLexer().isNot(AsmToken::Identifier))
3745       return TokError("expected value identifier or .end_amd_kernel_code_t");
3746 
3747     StringRef ID = getLexer().getTok().getIdentifier();
3748     Lex();
3749 
3750     if (ID == ".end_amd_kernel_code_t")
3751       break;
3752 
3753     if (ParseAMDKernelCodeTValue(ID, Header))
3754       return true;
3755   }
3756 
3757   getTargetStreamer().EmitAMDKernelCodeT(Header);
3758 
3759   return false;
3760 }
3761 
3762 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3763   if (getLexer().isNot(AsmToken::Identifier))
3764     return TokError("expected symbol name");
3765 
3766   StringRef KernelName = Parser.getTok().getString();
3767 
3768   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3769                                            ELF::STT_AMDGPU_HSA_KERNEL);
3770   Lex();
3771   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3772     KernelScope.initialize(getContext());
3773   return false;
3774 }
3775 
3776 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3777   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3778     return Error(getParser().getTok().getLoc(),
3779                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3780                  "architectures");
3781   }
3782 
3783   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3784 
3785   std::string ISAVersionStringFromSTI;
3786   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3787   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3788 
3789   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3790     return Error(getParser().getTok().getLoc(),
3791                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3792                  "arguments specified through the command line");
3793   }
3794 
3795   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3796   Lex();
3797 
3798   return false;
3799 }
3800 
3801 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3802   const char *AssemblerDirectiveBegin;
3803   const char *AssemblerDirectiveEnd;
3804   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3805       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3806           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3807                             HSAMD::V3::AssemblerDirectiveEnd)
3808           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3809                             HSAMD::AssemblerDirectiveEnd);
3810 
3811   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3812     return Error(getParser().getTok().getLoc(),
3813                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3814                  "not available on non-amdhsa OSes")).str());
3815   }
3816 
3817   std::string HSAMetadataString;
3818   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3819                           HSAMetadataString))
3820     return true;
3821 
3822   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3823     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3824       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3825   } else {
3826     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3827       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3828   }
3829 
3830   return false;
3831 }
3832 
3833 /// Common code to parse out a block of text (typically YAML) between start and
3834 /// end directives.
3835 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3836                                           const char *AssemblerDirectiveEnd,
3837                                           std::string &CollectString) {
3838 
3839   raw_string_ostream CollectStream(CollectString);
3840 
3841   getLexer().setSkipSpace(false);
3842 
3843   bool FoundEnd = false;
3844   while (!getLexer().is(AsmToken::Eof)) {
3845     while (getLexer().is(AsmToken::Space)) {
3846       CollectStream << getLexer().getTok().getString();
3847       Lex();
3848     }
3849 
3850     if (getLexer().is(AsmToken::Identifier)) {
3851       StringRef ID = getLexer().getTok().getIdentifier();
3852       if (ID == AssemblerDirectiveEnd) {
3853         Lex();
3854         FoundEnd = true;
3855         break;
3856       }
3857     }
3858 
3859     CollectStream << Parser.parseStringToEndOfStatement()
3860                   << getContext().getAsmInfo()->getSeparatorString();
3861 
3862     Parser.eatToEndOfStatement();
3863   }
3864 
3865   getLexer().setSkipSpace(true);
3866 
3867   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3868     return TokError(Twine("expected directive ") +
3869                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3870   }
3871 
3872   CollectStream.flush();
3873   return false;
3874 }
3875 
3876 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3877 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3878   std::string String;
3879   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3880                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3881     return true;
3882 
3883   auto PALMetadata = getTargetStreamer().getPALMetadata();
3884   if (!PALMetadata->setFromString(String))
3885     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3886   return false;
3887 }
3888 
3889 /// Parse the assembler directive for old linear-format PAL metadata.
3890 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3891   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3892     return Error(getParser().getTok().getLoc(),
3893                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3894                  "not available on non-amdpal OSes")).str());
3895   }
3896 
3897   auto PALMetadata = getTargetStreamer().getPALMetadata();
3898   PALMetadata->setLegacy();
3899   for (;;) {
3900     uint32_t Key, Value;
3901     if (ParseAsAbsoluteExpression(Key)) {
3902       return TokError(Twine("invalid value in ") +
3903                       Twine(PALMD::AssemblerDirective));
3904     }
3905     if (getLexer().isNot(AsmToken::Comma)) {
3906       return TokError(Twine("expected an even number of values in ") +
3907                       Twine(PALMD::AssemblerDirective));
3908     }
3909     Lex();
3910     if (ParseAsAbsoluteExpression(Value)) {
3911       return TokError(Twine("invalid value in ") +
3912                       Twine(PALMD::AssemblerDirective));
3913     }
3914     PALMetadata->setRegister(Key, Value);
3915     if (getLexer().isNot(AsmToken::Comma))
3916       break;
3917     Lex();
3918   }
3919   return false;
3920 }
3921 
3922 /// ParseDirectiveAMDGPULDS
3923 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
3924 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
3925   if (getParser().checkForValidSection())
3926     return true;
3927 
3928   StringRef Name;
3929   SMLoc NameLoc = getLexer().getLoc();
3930   if (getParser().parseIdentifier(Name))
3931     return TokError("expected identifier in directive");
3932 
3933   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
3934   if (parseToken(AsmToken::Comma, "expected ','"))
3935     return true;
3936 
3937   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
3938 
3939   int64_t Size;
3940   SMLoc SizeLoc = getLexer().getLoc();
3941   if (getParser().parseAbsoluteExpression(Size))
3942     return true;
3943   if (Size < 0)
3944     return Error(SizeLoc, "size must be non-negative");
3945   if (Size > LocalMemorySize)
3946     return Error(SizeLoc, "size is too large");
3947 
3948   int64_t Align = 4;
3949   if (getLexer().is(AsmToken::Comma)) {
3950     Lex();
3951     SMLoc AlignLoc = getLexer().getLoc();
3952     if (getParser().parseAbsoluteExpression(Align))
3953       return true;
3954     if (Align < 0 || !isPowerOf2_64(Align))
3955       return Error(AlignLoc, "alignment must be a power of two");
3956 
3957     // Alignment larger than the size of LDS is possible in theory, as long
3958     // as the linker manages to place to symbol at address 0, but we do want
3959     // to make sure the alignment fits nicely into a 32-bit integer.
3960     if (Align >= 1u << 31)
3961       return Error(AlignLoc, "alignment is too large");
3962   }
3963 
3964   if (parseToken(AsmToken::EndOfStatement,
3965                  "unexpected token in '.amdgpu_lds' directive"))
3966     return true;
3967 
3968   Symbol->redefineIfPossible();
3969   if (!Symbol->isUndefined())
3970     return Error(NameLoc, "invalid symbol redefinition");
3971 
3972   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
3973   return false;
3974 }
3975 
3976 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3977   StringRef IDVal = DirectiveID.getString();
3978 
3979   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3980     if (IDVal == ".amdgcn_target")
3981       return ParseDirectiveAMDGCNTarget();
3982 
3983     if (IDVal == ".amdhsa_kernel")
3984       return ParseDirectiveAMDHSAKernel();
3985 
3986     // TODO: Restructure/combine with PAL metadata directive.
3987     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3988       return ParseDirectiveHSAMetadata();
3989   } else {
3990     if (IDVal == ".hsa_code_object_version")
3991       return ParseDirectiveHSACodeObjectVersion();
3992 
3993     if (IDVal == ".hsa_code_object_isa")
3994       return ParseDirectiveHSACodeObjectISA();
3995 
3996     if (IDVal == ".amd_kernel_code_t")
3997       return ParseDirectiveAMDKernelCodeT();
3998 
3999     if (IDVal == ".amdgpu_hsa_kernel")
4000       return ParseDirectiveAMDGPUHsaKernel();
4001 
4002     if (IDVal == ".amd_amdgpu_isa")
4003       return ParseDirectiveISAVersion();
4004 
4005     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4006       return ParseDirectiveHSAMetadata();
4007   }
4008 
4009   if (IDVal == ".amdgpu_lds")
4010     return ParseDirectiveAMDGPULDS();
4011 
4012   if (IDVal == PALMD::AssemblerDirectiveBegin)
4013     return ParseDirectivePALMetadataBegin();
4014 
4015   if (IDVal == PALMD::AssemblerDirective)
4016     return ParseDirectivePALMetadata();
4017 
4018   return true;
4019 }
4020 
4021 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4022                                            unsigned RegNo) const {
4023 
4024   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4025        R.isValid(); ++R) {
4026     if (*R == RegNo)
4027       return isGFX9() || isGFX10();
4028   }
4029 
4030   // GFX10 has 2 more SGPRs 104 and 105.
4031   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4032        R.isValid(); ++R) {
4033     if (*R == RegNo)
4034       return hasSGPR104_SGPR105();
4035   }
4036 
4037   switch (RegNo) {
4038   case AMDGPU::SRC_SHARED_BASE:
4039   case AMDGPU::SRC_SHARED_LIMIT:
4040   case AMDGPU::SRC_PRIVATE_BASE:
4041   case AMDGPU::SRC_PRIVATE_LIMIT:
4042   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4043     return !isCI() && !isSI() && !isVI();
4044   case AMDGPU::TBA:
4045   case AMDGPU::TBA_LO:
4046   case AMDGPU::TBA_HI:
4047   case AMDGPU::TMA:
4048   case AMDGPU::TMA_LO:
4049   case AMDGPU::TMA_HI:
4050     return !isGFX9() && !isGFX10();
4051   case AMDGPU::XNACK_MASK:
4052   case AMDGPU::XNACK_MASK_LO:
4053   case AMDGPU::XNACK_MASK_HI:
4054     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4055   case AMDGPU::SGPR_NULL:
4056     return isGFX10();
4057   default:
4058     break;
4059   }
4060 
4061   if (isCI())
4062     return true;
4063 
4064   if (isSI() || isGFX10()) {
4065     // No flat_scr on SI.
4066     // On GFX10 flat scratch is not a valid register operand and can only be
4067     // accessed with s_setreg/s_getreg.
4068     switch (RegNo) {
4069     case AMDGPU::FLAT_SCR:
4070     case AMDGPU::FLAT_SCR_LO:
4071     case AMDGPU::FLAT_SCR_HI:
4072       return false;
4073     default:
4074       return true;
4075     }
4076   }
4077 
4078   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4079   // SI/CI have.
4080   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4081        R.isValid(); ++R) {
4082     if (*R == RegNo)
4083       return hasSGPR102_SGPR103();
4084   }
4085 
4086   return true;
4087 }
4088 
4089 OperandMatchResultTy
4090 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4091                               OperandMode Mode) {
4092   // Try to parse with a custom parser
4093   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4094 
4095   // If we successfully parsed the operand or if there as an error parsing,
4096   // we are done.
4097   //
4098   // If we are parsing after we reach EndOfStatement then this means we
4099   // are appending default values to the Operands list.  This is only done
4100   // by custom parser, so we shouldn't continue on to the generic parsing.
4101   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4102       getLexer().is(AsmToken::EndOfStatement))
4103     return ResTy;
4104 
4105   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4106     unsigned Prefix = Operands.size();
4107     SMLoc LBraceLoc = getTok().getLoc();
4108     Parser.Lex(); // eat the '['
4109 
4110     for (;;) {
4111       ResTy = parseReg(Operands);
4112       if (ResTy != MatchOperand_Success)
4113         return ResTy;
4114 
4115       if (getLexer().is(AsmToken::RBrac))
4116         break;
4117 
4118       if (getLexer().isNot(AsmToken::Comma))
4119         return MatchOperand_ParseFail;
4120       Parser.Lex();
4121     }
4122 
4123     if (Operands.size() - Prefix > 1) {
4124       Operands.insert(Operands.begin() + Prefix,
4125                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4126       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4127                                                     getTok().getLoc()));
4128     }
4129 
4130     Parser.Lex(); // eat the ']'
4131     return MatchOperand_Success;
4132   }
4133 
4134   return parseRegOrImm(Operands);
4135 }
4136 
4137 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4138   // Clear any forced encodings from the previous instruction.
4139   setForcedEncodingSize(0);
4140   setForcedDPP(false);
4141   setForcedSDWA(false);
4142 
4143   if (Name.endswith("_e64")) {
4144     setForcedEncodingSize(64);
4145     return Name.substr(0, Name.size() - 4);
4146   } else if (Name.endswith("_e32")) {
4147     setForcedEncodingSize(32);
4148     return Name.substr(0, Name.size() - 4);
4149   } else if (Name.endswith("_dpp")) {
4150     setForcedDPP(true);
4151     return Name.substr(0, Name.size() - 4);
4152   } else if (Name.endswith("_sdwa")) {
4153     setForcedSDWA(true);
4154     return Name.substr(0, Name.size() - 5);
4155   }
4156   return Name;
4157 }
4158 
4159 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4160                                        StringRef Name,
4161                                        SMLoc NameLoc, OperandVector &Operands) {
4162   // Add the instruction mnemonic
4163   Name = parseMnemonicSuffix(Name);
4164   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4165 
4166   bool IsMIMG = Name.startswith("image_");
4167 
4168   while (!getLexer().is(AsmToken::EndOfStatement)) {
4169     OperandMode Mode = OperandMode_Default;
4170     if (IsMIMG && isGFX10() && Operands.size() == 2)
4171       Mode = OperandMode_NSA;
4172     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4173 
4174     // Eat the comma or space if there is one.
4175     if (getLexer().is(AsmToken::Comma))
4176       Parser.Lex();
4177 
4178     switch (Res) {
4179       case MatchOperand_Success: break;
4180       case MatchOperand_ParseFail:
4181         // FIXME: use real operand location rather than the current location.
4182         Error(getLexer().getLoc(), "failed parsing operand.");
4183         while (!getLexer().is(AsmToken::EndOfStatement)) {
4184           Parser.Lex();
4185         }
4186         return true;
4187       case MatchOperand_NoMatch:
4188         // FIXME: use real operand location rather than the current location.
4189         Error(getLexer().getLoc(), "not a valid operand.");
4190         while (!getLexer().is(AsmToken::EndOfStatement)) {
4191           Parser.Lex();
4192         }
4193         return true;
4194     }
4195   }
4196 
4197   return false;
4198 }
4199 
4200 //===----------------------------------------------------------------------===//
4201 // Utility functions
4202 //===----------------------------------------------------------------------===//
4203 
4204 OperandMatchResultTy
4205 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4206 
4207   if (!trySkipId(Prefix, AsmToken::Colon))
4208     return MatchOperand_NoMatch;
4209 
4210   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4211 }
4212 
4213 OperandMatchResultTy
4214 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4215                                     AMDGPUOperand::ImmTy ImmTy,
4216                                     bool (*ConvertResult)(int64_t&)) {
4217   SMLoc S = getLoc();
4218   int64_t Value = 0;
4219 
4220   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4221   if (Res != MatchOperand_Success)
4222     return Res;
4223 
4224   if (ConvertResult && !ConvertResult(Value)) {
4225     Error(S, "invalid " + StringRef(Prefix) + " value.");
4226   }
4227 
4228   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4229   return MatchOperand_Success;
4230 }
4231 
4232 OperandMatchResultTy
4233 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4234                                              OperandVector &Operands,
4235                                              AMDGPUOperand::ImmTy ImmTy,
4236                                              bool (*ConvertResult)(int64_t&)) {
4237   SMLoc S = getLoc();
4238   if (!trySkipId(Prefix, AsmToken::Colon))
4239     return MatchOperand_NoMatch;
4240 
4241   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4242     return MatchOperand_ParseFail;
4243 
4244   unsigned Val = 0;
4245   const unsigned MaxSize = 4;
4246 
4247   // FIXME: How to verify the number of elements matches the number of src
4248   // operands?
4249   for (int I = 0; ; ++I) {
4250     int64_t Op;
4251     SMLoc Loc = getLoc();
4252     if (!parseExpr(Op))
4253       return MatchOperand_ParseFail;
4254 
4255     if (Op != 0 && Op != 1) {
4256       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4257       return MatchOperand_ParseFail;
4258     }
4259 
4260     Val |= (Op << I);
4261 
4262     if (trySkipToken(AsmToken::RBrac))
4263       break;
4264 
4265     if (I + 1 == MaxSize) {
4266       Error(getLoc(), "expected a closing square bracket");
4267       return MatchOperand_ParseFail;
4268     }
4269 
4270     if (!skipToken(AsmToken::Comma, "expected a comma"))
4271       return MatchOperand_ParseFail;
4272   }
4273 
4274   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4275   return MatchOperand_Success;
4276 }
4277 
4278 OperandMatchResultTy
4279 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4280                                AMDGPUOperand::ImmTy ImmTy) {
4281   int64_t Bit = 0;
4282   SMLoc S = Parser.getTok().getLoc();
4283 
4284   // We are at the end of the statement, and this is a default argument, so
4285   // use a default value.
4286   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4287     switch(getLexer().getKind()) {
4288       case AsmToken::Identifier: {
4289         StringRef Tok = Parser.getTok().getString();
4290         if (Tok == Name) {
4291           if (Tok == "r128" && isGFX9())
4292             Error(S, "r128 modifier is not supported on this GPU");
4293           if (Tok == "a16" && !isGFX9())
4294             Error(S, "a16 modifier is not supported on this GPU");
4295           Bit = 1;
4296           Parser.Lex();
4297         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4298           Bit = 0;
4299           Parser.Lex();
4300         } else {
4301           return MatchOperand_NoMatch;
4302         }
4303         break;
4304       }
4305       default:
4306         return MatchOperand_NoMatch;
4307     }
4308   }
4309 
4310   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4311     return MatchOperand_ParseFail;
4312 
4313   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4314   return MatchOperand_Success;
4315 }
4316 
4317 static void addOptionalImmOperand(
4318   MCInst& Inst, const OperandVector& Operands,
4319   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4320   AMDGPUOperand::ImmTy ImmT,
4321   int64_t Default = 0) {
4322   auto i = OptionalIdx.find(ImmT);
4323   if (i != OptionalIdx.end()) {
4324     unsigned Idx = i->second;
4325     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4326   } else {
4327     Inst.addOperand(MCOperand::createImm(Default));
4328   }
4329 }
4330 
4331 OperandMatchResultTy
4332 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4333   if (getLexer().isNot(AsmToken::Identifier)) {
4334     return MatchOperand_NoMatch;
4335   }
4336   StringRef Tok = Parser.getTok().getString();
4337   if (Tok != Prefix) {
4338     return MatchOperand_NoMatch;
4339   }
4340 
4341   Parser.Lex();
4342   if (getLexer().isNot(AsmToken::Colon)) {
4343     return MatchOperand_ParseFail;
4344   }
4345 
4346   Parser.Lex();
4347   if (getLexer().isNot(AsmToken::Identifier)) {
4348     return MatchOperand_ParseFail;
4349   }
4350 
4351   Value = Parser.getTok().getString();
4352   return MatchOperand_Success;
4353 }
4354 
4355 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4356 // values to live in a joint format operand in the MCInst encoding.
4357 OperandMatchResultTy
4358 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4359   SMLoc S = Parser.getTok().getLoc();
4360   int64_t Dfmt = 0, Nfmt = 0;
4361   // dfmt and nfmt can appear in either order, and each is optional.
4362   bool GotDfmt = false, GotNfmt = false;
4363   while (!GotDfmt || !GotNfmt) {
4364     if (!GotDfmt) {
4365       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4366       if (Res != MatchOperand_NoMatch) {
4367         if (Res != MatchOperand_Success)
4368           return Res;
4369         if (Dfmt >= 16) {
4370           Error(Parser.getTok().getLoc(), "out of range dfmt");
4371           return MatchOperand_ParseFail;
4372         }
4373         GotDfmt = true;
4374         Parser.Lex();
4375         continue;
4376       }
4377     }
4378     if (!GotNfmt) {
4379       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4380       if (Res != MatchOperand_NoMatch) {
4381         if (Res != MatchOperand_Success)
4382           return Res;
4383         if (Nfmt >= 8) {
4384           Error(Parser.getTok().getLoc(), "out of range nfmt");
4385           return MatchOperand_ParseFail;
4386         }
4387         GotNfmt = true;
4388         Parser.Lex();
4389         continue;
4390       }
4391     }
4392     break;
4393   }
4394   if (!GotDfmt && !GotNfmt)
4395     return MatchOperand_NoMatch;
4396   auto Format = Dfmt | Nfmt << 4;
4397   Operands.push_back(
4398       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4399   return MatchOperand_Success;
4400 }
4401 
4402 //===----------------------------------------------------------------------===//
4403 // ds
4404 //===----------------------------------------------------------------------===//
4405 
4406 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4407                                     const OperandVector &Operands) {
4408   OptionalImmIndexMap OptionalIdx;
4409 
4410   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4411     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4412 
4413     // Add the register arguments
4414     if (Op.isReg()) {
4415       Op.addRegOperands(Inst, 1);
4416       continue;
4417     }
4418 
4419     // Handle optional arguments
4420     OptionalIdx[Op.getImmTy()] = i;
4421   }
4422 
4423   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4424   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4425   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4426 
4427   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4428 }
4429 
4430 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4431                                 bool IsGdsHardcoded) {
4432   OptionalImmIndexMap OptionalIdx;
4433 
4434   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4435     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4436 
4437     // Add the register arguments
4438     if (Op.isReg()) {
4439       Op.addRegOperands(Inst, 1);
4440       continue;
4441     }
4442 
4443     if (Op.isToken() && Op.getToken() == "gds") {
4444       IsGdsHardcoded = true;
4445       continue;
4446     }
4447 
4448     // Handle optional arguments
4449     OptionalIdx[Op.getImmTy()] = i;
4450   }
4451 
4452   AMDGPUOperand::ImmTy OffsetType =
4453     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4454      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4455      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4456                                                       AMDGPUOperand::ImmTyOffset;
4457 
4458   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4459 
4460   if (!IsGdsHardcoded) {
4461     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4462   }
4463   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4464 }
4465 
4466 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4467   OptionalImmIndexMap OptionalIdx;
4468 
4469   unsigned OperandIdx[4];
4470   unsigned EnMask = 0;
4471   int SrcIdx = 0;
4472 
4473   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4474     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4475 
4476     // Add the register arguments
4477     if (Op.isReg()) {
4478       assert(SrcIdx < 4);
4479       OperandIdx[SrcIdx] = Inst.size();
4480       Op.addRegOperands(Inst, 1);
4481       ++SrcIdx;
4482       continue;
4483     }
4484 
4485     if (Op.isOff()) {
4486       assert(SrcIdx < 4);
4487       OperandIdx[SrcIdx] = Inst.size();
4488       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4489       ++SrcIdx;
4490       continue;
4491     }
4492 
4493     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4494       Op.addImmOperands(Inst, 1);
4495       continue;
4496     }
4497 
4498     if (Op.isToken() && Op.getToken() == "done")
4499       continue;
4500 
4501     // Handle optional arguments
4502     OptionalIdx[Op.getImmTy()] = i;
4503   }
4504 
4505   assert(SrcIdx == 4);
4506 
4507   bool Compr = false;
4508   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4509     Compr = true;
4510     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4511     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4512     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4513   }
4514 
4515   for (auto i = 0; i < SrcIdx; ++i) {
4516     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4517       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4518     }
4519   }
4520 
4521   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4522   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4523 
4524   Inst.addOperand(MCOperand::createImm(EnMask));
4525 }
4526 
4527 //===----------------------------------------------------------------------===//
4528 // s_waitcnt
4529 //===----------------------------------------------------------------------===//
4530 
4531 static bool
4532 encodeCnt(
4533   const AMDGPU::IsaVersion ISA,
4534   int64_t &IntVal,
4535   int64_t CntVal,
4536   bool Saturate,
4537   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4538   unsigned (*decode)(const IsaVersion &Version, unsigned))
4539 {
4540   bool Failed = false;
4541 
4542   IntVal = encode(ISA, IntVal, CntVal);
4543   if (CntVal != decode(ISA, IntVal)) {
4544     if (Saturate) {
4545       IntVal = encode(ISA, IntVal, -1);
4546     } else {
4547       Failed = true;
4548     }
4549   }
4550   return Failed;
4551 }
4552 
4553 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4554 
4555   SMLoc CntLoc = getLoc();
4556   StringRef CntName = getTokenStr();
4557 
4558   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4559       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4560     return false;
4561 
4562   int64_t CntVal;
4563   SMLoc ValLoc = getLoc();
4564   if (!parseExpr(CntVal))
4565     return false;
4566 
4567   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4568 
4569   bool Failed = true;
4570   bool Sat = CntName.endswith("_sat");
4571 
4572   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4573     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4574   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4575     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4576   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4577     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4578   } else {
4579     Error(CntLoc, "invalid counter name " + CntName);
4580     return false;
4581   }
4582 
4583   if (Failed) {
4584     Error(ValLoc, "too large value for " + CntName);
4585     return false;
4586   }
4587 
4588   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4589     return false;
4590 
4591   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4592     if (isToken(AsmToken::EndOfStatement)) {
4593       Error(getLoc(), "expected a counter name");
4594       return false;
4595     }
4596   }
4597 
4598   return true;
4599 }
4600 
4601 OperandMatchResultTy
4602 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4603   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4604   int64_t Waitcnt = getWaitcntBitMask(ISA);
4605   SMLoc S = getLoc();
4606 
4607   // If parse failed, do not return error code
4608   // to avoid excessive error messages.
4609   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4610     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4611   } else {
4612     parseExpr(Waitcnt);
4613   }
4614 
4615   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4616   return MatchOperand_Success;
4617 }
4618 
4619 bool
4620 AMDGPUOperand::isSWaitCnt() const {
4621   return isImm();
4622 }
4623 
4624 //===----------------------------------------------------------------------===//
4625 // hwreg
4626 //===----------------------------------------------------------------------===//
4627 
4628 bool
4629 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4630                                 int64_t &Offset,
4631                                 int64_t &Width) {
4632   using namespace llvm::AMDGPU::Hwreg;
4633 
4634   // The register may be specified by name or using a numeric code
4635   if (isToken(AsmToken::Identifier) &&
4636       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4637     HwReg.IsSymbolic = true;
4638     lex(); // skip message name
4639   } else if (!parseExpr(HwReg.Id)) {
4640     return false;
4641   }
4642 
4643   if (trySkipToken(AsmToken::RParen))
4644     return true;
4645 
4646   // parse optional params
4647   return
4648     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4649     parseExpr(Offset) &&
4650     skipToken(AsmToken::Comma, "expected a comma") &&
4651     parseExpr(Width) &&
4652     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4653 }
4654 
4655 void
4656 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4657                                const int64_t Offset,
4658                                const int64_t Width,
4659                                const SMLoc Loc) {
4660 
4661   using namespace llvm::AMDGPU::Hwreg;
4662 
4663   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4664     Error(Loc, "specified hardware register is not supported on this GPU");
4665   } else if (!isValidHwreg(HwReg.Id)) {
4666     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4667   } else if (!isValidHwregOffset(Offset)) {
4668     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4669   } else if (!isValidHwregWidth(Width)) {
4670     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4671   }
4672 }
4673 
4674 OperandMatchResultTy
4675 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4676   using namespace llvm::AMDGPU::Hwreg;
4677 
4678   int64_t ImmVal = 0;
4679   SMLoc Loc = getLoc();
4680 
4681   // If parse failed, do not return error code
4682   // to avoid excessive error messages.
4683   if (trySkipId("hwreg", AsmToken::LParen)) {
4684     OperandInfoTy HwReg(ID_UNKNOWN_);
4685     int64_t Offset = OFFSET_DEFAULT_;
4686     int64_t Width = WIDTH_DEFAULT_;
4687     if (parseHwregBody(HwReg, Offset, Width)) {
4688       validateHwreg(HwReg, Offset, Width, Loc);
4689       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4690     }
4691   } else if (parseExpr(ImmVal)) {
4692     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4693       Error(Loc, "invalid immediate: only 16-bit values are legal");
4694   }
4695 
4696   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4697   return MatchOperand_Success;
4698 }
4699 
4700 bool AMDGPUOperand::isHwreg() const {
4701   return isImmTy(ImmTyHwreg);
4702 }
4703 
4704 //===----------------------------------------------------------------------===//
4705 // sendmsg
4706 //===----------------------------------------------------------------------===//
4707 
4708 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4709   using namespace llvm::AMDGPU::SendMsg;
4710 
4711   if (Parser.getTok().getString() != "sendmsg")
4712     return true;
4713   Parser.Lex();
4714 
4715   if (getLexer().isNot(AsmToken::LParen))
4716     return true;
4717   Parser.Lex();
4718 
4719   if (getLexer().is(AsmToken::Identifier)) {
4720     Msg.IsSymbolic = true;
4721     Msg.Id = ID_UNKNOWN_;
4722     const std::string tok = Parser.getTok().getString();
4723     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4724       switch(i) {
4725         default: continue; // Omit gaps.
4726         case ID_GS_ALLOC_REQ:
4727           if (isSI() || isCI() || isVI())
4728             continue;
4729           break;
4730         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4731         case ID_SYSMSG: break;
4732       }
4733       if (tok == IdSymbolic[i]) {
4734         Msg.Id = i;
4735         break;
4736       }
4737     }
4738     Parser.Lex();
4739   } else {
4740     Msg.IsSymbolic = false;
4741     if (getLexer().isNot(AsmToken::Integer))
4742       return true;
4743     if (getParser().parseAbsoluteExpression(Msg.Id))
4744       return true;
4745     if (getLexer().is(AsmToken::Integer))
4746       if (getParser().parseAbsoluteExpression(Msg.Id))
4747         Msg.Id = ID_UNKNOWN_;
4748   }
4749   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4750     return false;
4751 
4752   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4753     if (getLexer().isNot(AsmToken::RParen))
4754       return true;
4755     Parser.Lex();
4756     return false;
4757   }
4758 
4759   if (getLexer().isNot(AsmToken::Comma))
4760     return true;
4761   Parser.Lex();
4762 
4763   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4764   Operation.Id = ID_UNKNOWN_;
4765   if (getLexer().is(AsmToken::Identifier)) {
4766     Operation.IsSymbolic = true;
4767     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4768     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4769     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4770     const StringRef Tok = Parser.getTok().getString();
4771     for (int i = F; i < L; ++i) {
4772       if (Tok == S[i]) {
4773         Operation.Id = i;
4774         break;
4775       }
4776     }
4777     Parser.Lex();
4778   } else {
4779     Operation.IsSymbolic = false;
4780     if (getLexer().isNot(AsmToken::Integer))
4781       return true;
4782     if (getParser().parseAbsoluteExpression(Operation.Id))
4783       return true;
4784   }
4785 
4786   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4787     // Stream id is optional.
4788     if (getLexer().is(AsmToken::RParen)) {
4789       Parser.Lex();
4790       return false;
4791     }
4792 
4793     if (getLexer().isNot(AsmToken::Comma))
4794       return true;
4795     Parser.Lex();
4796 
4797     if (getLexer().isNot(AsmToken::Integer))
4798       return true;
4799     if (getParser().parseAbsoluteExpression(StreamId))
4800       return true;
4801   }
4802 
4803   if (getLexer().isNot(AsmToken::RParen))
4804     return true;
4805   Parser.Lex();
4806   return false;
4807 }
4808 
4809 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4810   if (getLexer().getKind() != AsmToken::Identifier)
4811     return MatchOperand_NoMatch;
4812 
4813   StringRef Str = Parser.getTok().getString();
4814   int Slot = StringSwitch<int>(Str)
4815     .Case("p10", 0)
4816     .Case("p20", 1)
4817     .Case("p0", 2)
4818     .Default(-1);
4819 
4820   SMLoc S = Parser.getTok().getLoc();
4821   if (Slot == -1)
4822     return MatchOperand_ParseFail;
4823 
4824   Parser.Lex();
4825   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4826                                               AMDGPUOperand::ImmTyInterpSlot));
4827   return MatchOperand_Success;
4828 }
4829 
4830 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4831   if (getLexer().getKind() != AsmToken::Identifier)
4832     return MatchOperand_NoMatch;
4833 
4834   StringRef Str = Parser.getTok().getString();
4835   if (!Str.startswith("attr"))
4836     return MatchOperand_NoMatch;
4837 
4838   StringRef Chan = Str.take_back(2);
4839   int AttrChan = StringSwitch<int>(Chan)
4840     .Case(".x", 0)
4841     .Case(".y", 1)
4842     .Case(".z", 2)
4843     .Case(".w", 3)
4844     .Default(-1);
4845   if (AttrChan == -1)
4846     return MatchOperand_ParseFail;
4847 
4848   Str = Str.drop_back(2).drop_front(4);
4849 
4850   uint8_t Attr;
4851   if (Str.getAsInteger(10, Attr))
4852     return MatchOperand_ParseFail;
4853 
4854   SMLoc S = Parser.getTok().getLoc();
4855   Parser.Lex();
4856   if (Attr > 63) {
4857     Error(S, "out of bounds attr");
4858     return MatchOperand_Success;
4859   }
4860 
4861   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4862 
4863   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4864                                               AMDGPUOperand::ImmTyInterpAttr));
4865   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4866                                               AMDGPUOperand::ImmTyAttrChan));
4867   return MatchOperand_Success;
4868 }
4869 
4870 void AMDGPUAsmParser::errorExpTgt() {
4871   Error(Parser.getTok().getLoc(), "invalid exp target");
4872 }
4873 
4874 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4875                                                       uint8_t &Val) {
4876   if (Str == "null") {
4877     Val = 9;
4878     return MatchOperand_Success;
4879   }
4880 
4881   if (Str.startswith("mrt")) {
4882     Str = Str.drop_front(3);
4883     if (Str == "z") { // == mrtz
4884       Val = 8;
4885       return MatchOperand_Success;
4886     }
4887 
4888     if (Str.getAsInteger(10, Val))
4889       return MatchOperand_ParseFail;
4890 
4891     if (Val > 7)
4892       errorExpTgt();
4893 
4894     return MatchOperand_Success;
4895   }
4896 
4897   if (Str.startswith("pos")) {
4898     Str = Str.drop_front(3);
4899     if (Str.getAsInteger(10, Val))
4900       return MatchOperand_ParseFail;
4901 
4902     if (Val > 4 || (Val == 4 && !isGFX10()))
4903       errorExpTgt();
4904 
4905     Val += 12;
4906     return MatchOperand_Success;
4907   }
4908 
4909   if (isGFX10() && Str == "prim") {
4910     Val = 20;
4911     return MatchOperand_Success;
4912   }
4913 
4914   if (Str.startswith("param")) {
4915     Str = Str.drop_front(5);
4916     if (Str.getAsInteger(10, Val))
4917       return MatchOperand_ParseFail;
4918 
4919     if (Val >= 32)
4920       errorExpTgt();
4921 
4922     Val += 32;
4923     return MatchOperand_Success;
4924   }
4925 
4926   if (Str.startswith("invalid_target_")) {
4927     Str = Str.drop_front(15);
4928     if (Str.getAsInteger(10, Val))
4929       return MatchOperand_ParseFail;
4930 
4931     errorExpTgt();
4932     return MatchOperand_Success;
4933   }
4934 
4935   return MatchOperand_NoMatch;
4936 }
4937 
4938 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4939   uint8_t Val;
4940   StringRef Str = Parser.getTok().getString();
4941 
4942   auto Res = parseExpTgtImpl(Str, Val);
4943   if (Res != MatchOperand_Success)
4944     return Res;
4945 
4946   SMLoc S = Parser.getTok().getLoc();
4947   Parser.Lex();
4948 
4949   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4950                                               AMDGPUOperand::ImmTyExpTgt));
4951   return MatchOperand_Success;
4952 }
4953 
4954 OperandMatchResultTy
4955 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4956   using namespace llvm::AMDGPU::SendMsg;
4957 
4958   int64_t Imm16Val = 0;
4959   SMLoc S = Parser.getTok().getLoc();
4960 
4961   switch(getLexer().getKind()) {
4962   default:
4963     return MatchOperand_NoMatch;
4964   case AsmToken::Integer:
4965     // The operand can be an integer value.
4966     if (getParser().parseAbsoluteExpression(Imm16Val))
4967       return MatchOperand_NoMatch;
4968     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4969       Error(S, "invalid immediate: only 16-bit values are legal");
4970       // Do not return error code, but create an imm operand anyway and proceed
4971       // to the next operand, if any. That avoids unneccessary error messages.
4972     }
4973     break;
4974   case AsmToken::Identifier: {
4975       OperandInfoTy Msg(ID_UNKNOWN_);
4976       OperandInfoTy Operation(OP_UNKNOWN_);
4977       int64_t StreamId = STREAM_ID_DEFAULT_;
4978       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4979         return MatchOperand_ParseFail;
4980       do {
4981         // Validate and encode message ID.
4982         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4983                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4984                 || Msg.Id == ID_SYSMSG)) {
4985           if (Msg.IsSymbolic)
4986             Error(S, "invalid/unsupported symbolic name of message");
4987           else
4988             Error(S, "invalid/unsupported code of message");
4989           break;
4990         }
4991         Imm16Val = (Msg.Id << ID_SHIFT_);
4992         // Validate and encode operation ID.
4993         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4994           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4995             if (Operation.IsSymbolic)
4996               Error(S, "invalid symbolic name of GS_OP");
4997             else
4998               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4999             break;
5000           }
5001           if (Operation.Id == OP_GS_NOP
5002               && Msg.Id != ID_GS_DONE) {
5003             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
5004             break;
5005           }
5006           Imm16Val |= (Operation.Id << OP_SHIFT_);
5007         }
5008         if (Msg.Id == ID_SYSMSG) {
5009           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
5010             if (Operation.IsSymbolic)
5011               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
5012             else
5013               Error(S, "invalid/unsupported code of SYSMSG_OP");
5014             break;
5015           }
5016           Imm16Val |= (Operation.Id << OP_SHIFT_);
5017         }
5018         // Validate and encode stream ID.
5019         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
5020           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
5021             Error(S, "invalid stream id: only 2-bit values are legal");
5022             break;
5023           }
5024           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
5025         }
5026       } while (false);
5027     }
5028     break;
5029   }
5030   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
5031   return MatchOperand_Success;
5032 }
5033 
5034 bool AMDGPUOperand::isSendMsg() const {
5035   return isImmTy(ImmTySendMsg);
5036 }
5037 
5038 //===----------------------------------------------------------------------===//
5039 // parser helpers
5040 //===----------------------------------------------------------------------===//
5041 
5042 bool
5043 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5044   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5045 }
5046 
5047 bool
5048 AMDGPUAsmParser::isId(const StringRef Id) const {
5049   return isId(getToken(), Id);
5050 }
5051 
5052 bool
5053 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5054   return getTokenKind() == Kind;
5055 }
5056 
5057 bool
5058 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5059   if (isId(Id)) {
5060     lex();
5061     return true;
5062   }
5063   return false;
5064 }
5065 
5066 bool
5067 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5068   if (isId(Id) && peekToken().is(Kind)) {
5069     lex();
5070     lex();
5071     return true;
5072   }
5073   return false;
5074 }
5075 
5076 bool
5077 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5078   if (isToken(Kind)) {
5079     lex();
5080     return true;
5081   }
5082   return false;
5083 }
5084 
5085 bool
5086 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5087                            const StringRef ErrMsg) {
5088   if (!trySkipToken(Kind)) {
5089     Error(getLoc(), ErrMsg);
5090     return false;
5091   }
5092   return true;
5093 }
5094 
5095 bool
5096 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5097   return !getParser().parseAbsoluteExpression(Imm);
5098 }
5099 
5100 bool
5101 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5102   if (isToken(AsmToken::String)) {
5103     Val = getToken().getStringContents();
5104     lex();
5105     return true;
5106   } else {
5107     Error(getLoc(), ErrMsg);
5108     return false;
5109   }
5110 }
5111 
5112 AsmToken
5113 AMDGPUAsmParser::getToken() const {
5114   return Parser.getTok();
5115 }
5116 
5117 AsmToken
5118 AMDGPUAsmParser::peekToken() {
5119   return getLexer().peekTok();
5120 }
5121 
5122 void
5123 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5124   auto TokCount = getLexer().peekTokens(Tokens);
5125 
5126   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5127     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5128 }
5129 
5130 AsmToken::TokenKind
5131 AMDGPUAsmParser::getTokenKind() const {
5132   return getLexer().getKind();
5133 }
5134 
5135 SMLoc
5136 AMDGPUAsmParser::getLoc() const {
5137   return getToken().getLoc();
5138 }
5139 
5140 StringRef
5141 AMDGPUAsmParser::getTokenStr() const {
5142   return getToken().getString();
5143 }
5144 
5145 void
5146 AMDGPUAsmParser::lex() {
5147   Parser.Lex();
5148 }
5149 
5150 //===----------------------------------------------------------------------===//
5151 // swizzle
5152 //===----------------------------------------------------------------------===//
5153 
5154 LLVM_READNONE
5155 static unsigned
5156 encodeBitmaskPerm(const unsigned AndMask,
5157                   const unsigned OrMask,
5158                   const unsigned XorMask) {
5159   using namespace llvm::AMDGPU::Swizzle;
5160 
5161   return BITMASK_PERM_ENC |
5162          (AndMask << BITMASK_AND_SHIFT) |
5163          (OrMask  << BITMASK_OR_SHIFT)  |
5164          (XorMask << BITMASK_XOR_SHIFT);
5165 }
5166 
5167 bool
5168 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5169                                       const unsigned MinVal,
5170                                       const unsigned MaxVal,
5171                                       const StringRef ErrMsg) {
5172   for (unsigned i = 0; i < OpNum; ++i) {
5173     if (!skipToken(AsmToken::Comma, "expected a comma")){
5174       return false;
5175     }
5176     SMLoc ExprLoc = Parser.getTok().getLoc();
5177     if (!parseExpr(Op[i])) {
5178       return false;
5179     }
5180     if (Op[i] < MinVal || Op[i] > MaxVal) {
5181       Error(ExprLoc, ErrMsg);
5182       return false;
5183     }
5184   }
5185 
5186   return true;
5187 }
5188 
5189 bool
5190 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5191   using namespace llvm::AMDGPU::Swizzle;
5192 
5193   int64_t Lane[LANE_NUM];
5194   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5195                            "expected a 2-bit lane id")) {
5196     Imm = QUAD_PERM_ENC;
5197     for (unsigned I = 0; I < LANE_NUM; ++I) {
5198       Imm |= Lane[I] << (LANE_SHIFT * I);
5199     }
5200     return true;
5201   }
5202   return false;
5203 }
5204 
5205 bool
5206 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5207   using namespace llvm::AMDGPU::Swizzle;
5208 
5209   SMLoc S = Parser.getTok().getLoc();
5210   int64_t GroupSize;
5211   int64_t LaneIdx;
5212 
5213   if (!parseSwizzleOperands(1, &GroupSize,
5214                             2, 32,
5215                             "group size must be in the interval [2,32]")) {
5216     return false;
5217   }
5218   if (!isPowerOf2_64(GroupSize)) {
5219     Error(S, "group size must be a power of two");
5220     return false;
5221   }
5222   if (parseSwizzleOperands(1, &LaneIdx,
5223                            0, GroupSize - 1,
5224                            "lane id must be in the interval [0,group size - 1]")) {
5225     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5226     return true;
5227   }
5228   return false;
5229 }
5230 
5231 bool
5232 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5233   using namespace llvm::AMDGPU::Swizzle;
5234 
5235   SMLoc S = Parser.getTok().getLoc();
5236   int64_t GroupSize;
5237 
5238   if (!parseSwizzleOperands(1, &GroupSize,
5239       2, 32, "group size must be in the interval [2,32]")) {
5240     return false;
5241   }
5242   if (!isPowerOf2_64(GroupSize)) {
5243     Error(S, "group size must be a power of two");
5244     return false;
5245   }
5246 
5247   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5248   return true;
5249 }
5250 
5251 bool
5252 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5253   using namespace llvm::AMDGPU::Swizzle;
5254 
5255   SMLoc S = Parser.getTok().getLoc();
5256   int64_t GroupSize;
5257 
5258   if (!parseSwizzleOperands(1, &GroupSize,
5259       1, 16, "group size must be in the interval [1,16]")) {
5260     return false;
5261   }
5262   if (!isPowerOf2_64(GroupSize)) {
5263     Error(S, "group size must be a power of two");
5264     return false;
5265   }
5266 
5267   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5268   return true;
5269 }
5270 
5271 bool
5272 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5273   using namespace llvm::AMDGPU::Swizzle;
5274 
5275   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5276     return false;
5277   }
5278 
5279   StringRef Ctl;
5280   SMLoc StrLoc = Parser.getTok().getLoc();
5281   if (!parseString(Ctl)) {
5282     return false;
5283   }
5284   if (Ctl.size() != BITMASK_WIDTH) {
5285     Error(StrLoc, "expected a 5-character mask");
5286     return false;
5287   }
5288 
5289   unsigned AndMask = 0;
5290   unsigned OrMask = 0;
5291   unsigned XorMask = 0;
5292 
5293   for (size_t i = 0; i < Ctl.size(); ++i) {
5294     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5295     switch(Ctl[i]) {
5296     default:
5297       Error(StrLoc, "invalid mask");
5298       return false;
5299     case '0':
5300       break;
5301     case '1':
5302       OrMask |= Mask;
5303       break;
5304     case 'p':
5305       AndMask |= Mask;
5306       break;
5307     case 'i':
5308       AndMask |= Mask;
5309       XorMask |= Mask;
5310       break;
5311     }
5312   }
5313 
5314   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5315   return true;
5316 }
5317 
5318 bool
5319 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5320 
5321   SMLoc OffsetLoc = Parser.getTok().getLoc();
5322 
5323   if (!parseExpr(Imm)) {
5324     return false;
5325   }
5326   if (!isUInt<16>(Imm)) {
5327     Error(OffsetLoc, "expected a 16-bit offset");
5328     return false;
5329   }
5330   return true;
5331 }
5332 
5333 bool
5334 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5335   using namespace llvm::AMDGPU::Swizzle;
5336 
5337   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5338 
5339     SMLoc ModeLoc = Parser.getTok().getLoc();
5340     bool Ok = false;
5341 
5342     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5343       Ok = parseSwizzleQuadPerm(Imm);
5344     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5345       Ok = parseSwizzleBitmaskPerm(Imm);
5346     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5347       Ok = parseSwizzleBroadcast(Imm);
5348     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5349       Ok = parseSwizzleSwap(Imm);
5350     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5351       Ok = parseSwizzleReverse(Imm);
5352     } else {
5353       Error(ModeLoc, "expected a swizzle mode");
5354     }
5355 
5356     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5357   }
5358 
5359   return false;
5360 }
5361 
5362 OperandMatchResultTy
5363 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5364   SMLoc S = Parser.getTok().getLoc();
5365   int64_t Imm = 0;
5366 
5367   if (trySkipId("offset")) {
5368 
5369     bool Ok = false;
5370     if (skipToken(AsmToken::Colon, "expected a colon")) {
5371       if (trySkipId("swizzle")) {
5372         Ok = parseSwizzleMacro(Imm);
5373       } else {
5374         Ok = parseSwizzleOffset(Imm);
5375       }
5376     }
5377 
5378     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5379 
5380     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5381   } else {
5382     // Swizzle "offset" operand is optional.
5383     // If it is omitted, try parsing other optional operands.
5384     return parseOptionalOpr(Operands);
5385   }
5386 }
5387 
5388 bool
5389 AMDGPUOperand::isSwizzle() const {
5390   return isImmTy(ImmTySwizzle);
5391 }
5392 
5393 //===----------------------------------------------------------------------===//
5394 // VGPR Index Mode
5395 //===----------------------------------------------------------------------===//
5396 
5397 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5398 
5399   using namespace llvm::AMDGPU::VGPRIndexMode;
5400 
5401   if (trySkipToken(AsmToken::RParen)) {
5402     return OFF;
5403   }
5404 
5405   int64_t Imm = 0;
5406 
5407   while (true) {
5408     unsigned Mode = 0;
5409     SMLoc S = Parser.getTok().getLoc();
5410 
5411     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5412       if (trySkipId(IdSymbolic[ModeId])) {
5413         Mode = 1 << ModeId;
5414         break;
5415       }
5416     }
5417 
5418     if (Mode == 0) {
5419       Error(S, (Imm == 0)?
5420                "expected a VGPR index mode or a closing parenthesis" :
5421                "expected a VGPR index mode");
5422       break;
5423     }
5424 
5425     if (Imm & Mode) {
5426       Error(S, "duplicate VGPR index mode");
5427       break;
5428     }
5429     Imm |= Mode;
5430 
5431     if (trySkipToken(AsmToken::RParen))
5432       break;
5433     if (!skipToken(AsmToken::Comma,
5434                    "expected a comma or a closing parenthesis"))
5435       break;
5436   }
5437 
5438   return Imm;
5439 }
5440 
5441 OperandMatchResultTy
5442 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5443 
5444   int64_t Imm = 0;
5445   SMLoc S = Parser.getTok().getLoc();
5446 
5447   if (getLexer().getKind() == AsmToken::Identifier &&
5448       Parser.getTok().getString() == "gpr_idx" &&
5449       getLexer().peekTok().is(AsmToken::LParen)) {
5450 
5451     Parser.Lex();
5452     Parser.Lex();
5453 
5454     // If parse failed, trigger an error but do not return error code
5455     // to avoid excessive error messages.
5456     Imm = parseGPRIdxMacro();
5457 
5458   } else {
5459     if (getParser().parseAbsoluteExpression(Imm))
5460       return MatchOperand_NoMatch;
5461     if (Imm < 0 || !isUInt<4>(Imm)) {
5462       Error(S, "invalid immediate: only 4-bit values are legal");
5463     }
5464   }
5465 
5466   Operands.push_back(
5467       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5468   return MatchOperand_Success;
5469 }
5470 
5471 bool AMDGPUOperand::isGPRIdxMode() const {
5472   return isImmTy(ImmTyGprIdxMode);
5473 }
5474 
5475 //===----------------------------------------------------------------------===//
5476 // sopp branch targets
5477 //===----------------------------------------------------------------------===//
5478 
5479 OperandMatchResultTy
5480 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5481   SMLoc S = Parser.getTok().getLoc();
5482 
5483   switch (getLexer().getKind()) {
5484     default: return MatchOperand_ParseFail;
5485     case AsmToken::Integer: {
5486       int64_t Imm;
5487       if (getParser().parseAbsoluteExpression(Imm))
5488         return MatchOperand_ParseFail;
5489       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5490       return MatchOperand_Success;
5491     }
5492 
5493     case AsmToken::Identifier:
5494       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5495           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5496                                   Parser.getTok().getString()), getContext()), S));
5497       Parser.Lex();
5498       return MatchOperand_Success;
5499   }
5500 }
5501 
5502 //===----------------------------------------------------------------------===//
5503 // Boolean holding registers
5504 //===----------------------------------------------------------------------===//
5505 
5506 OperandMatchResultTy
5507 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5508   return parseReg(Operands);
5509 }
5510 
5511 //===----------------------------------------------------------------------===//
5512 // mubuf
5513 //===----------------------------------------------------------------------===//
5514 
5515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5516   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5517 }
5518 
5519 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5520   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5521 }
5522 
5523 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5524   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5525 }
5526 
5527 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5528                                const OperandVector &Operands,
5529                                bool IsAtomic,
5530                                bool IsAtomicReturn,
5531                                bool IsLds) {
5532   bool IsLdsOpcode = IsLds;
5533   bool HasLdsModifier = false;
5534   OptionalImmIndexMap OptionalIdx;
5535   assert(IsAtomicReturn ? IsAtomic : true);
5536   unsigned FirstOperandIdx = 1;
5537 
5538   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5539     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5540 
5541     // Add the register arguments
5542     if (Op.isReg()) {
5543       Op.addRegOperands(Inst, 1);
5544       // Insert a tied src for atomic return dst.
5545       // This cannot be postponed as subsequent calls to
5546       // addImmOperands rely on correct number of MC operands.
5547       if (IsAtomicReturn && i == FirstOperandIdx)
5548         Op.addRegOperands(Inst, 1);
5549       continue;
5550     }
5551 
5552     // Handle the case where soffset is an immediate
5553     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5554       Op.addImmOperands(Inst, 1);
5555       continue;
5556     }
5557 
5558     HasLdsModifier |= Op.isLDS();
5559 
5560     // Handle tokens like 'offen' which are sometimes hard-coded into the
5561     // asm string.  There are no MCInst operands for these.
5562     if (Op.isToken()) {
5563       continue;
5564     }
5565     assert(Op.isImm());
5566 
5567     // Handle optional arguments
5568     OptionalIdx[Op.getImmTy()] = i;
5569   }
5570 
5571   // This is a workaround for an llvm quirk which may result in an
5572   // incorrect instruction selection. Lds and non-lds versions of
5573   // MUBUF instructions are identical except that lds versions
5574   // have mandatory 'lds' modifier. However this modifier follows
5575   // optional modifiers and llvm asm matcher regards this 'lds'
5576   // modifier as an optional one. As a result, an lds version
5577   // of opcode may be selected even if it has no 'lds' modifier.
5578   if (IsLdsOpcode && !HasLdsModifier) {
5579     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5580     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5581       Inst.setOpcode(NoLdsOpcode);
5582       IsLdsOpcode = false;
5583     }
5584   }
5585 
5586   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5587   if (!IsAtomic) { // glc is hard-coded.
5588     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5589   }
5590   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5591 
5592   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5593     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5594   }
5595 
5596   if (isGFX10())
5597     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5598 }
5599 
5600 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5601   OptionalImmIndexMap OptionalIdx;
5602 
5603   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5604     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5605 
5606     // Add the register arguments
5607     if (Op.isReg()) {
5608       Op.addRegOperands(Inst, 1);
5609       continue;
5610     }
5611 
5612     // Handle the case where soffset is an immediate
5613     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5614       Op.addImmOperands(Inst, 1);
5615       continue;
5616     }
5617 
5618     // Handle tokens like 'offen' which are sometimes hard-coded into the
5619     // asm string.  There are no MCInst operands for these.
5620     if (Op.isToken()) {
5621       continue;
5622     }
5623     assert(Op.isImm());
5624 
5625     // Handle optional arguments
5626     OptionalIdx[Op.getImmTy()] = i;
5627   }
5628 
5629   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5630                         AMDGPUOperand::ImmTyOffset);
5631   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5632   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5633   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5634   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5635 
5636   if (isGFX10())
5637     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5638 }
5639 
5640 //===----------------------------------------------------------------------===//
5641 // mimg
5642 //===----------------------------------------------------------------------===//
5643 
5644 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5645                               bool IsAtomic) {
5646   unsigned I = 1;
5647   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5648   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5649     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5650   }
5651 
5652   if (IsAtomic) {
5653     // Add src, same as dst
5654     assert(Desc.getNumDefs() == 1);
5655     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5656   }
5657 
5658   OptionalImmIndexMap OptionalIdx;
5659 
5660   for (unsigned E = Operands.size(); I != E; ++I) {
5661     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5662 
5663     // Add the register arguments
5664     if (Op.isReg()) {
5665       Op.addRegOperands(Inst, 1);
5666     } else if (Op.isImmModifier()) {
5667       OptionalIdx[Op.getImmTy()] = I;
5668     } else if (!Op.isToken()) {
5669       llvm_unreachable("unexpected operand type");
5670     }
5671   }
5672 
5673   bool IsGFX10 = isGFX10();
5674 
5675   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5676   if (IsGFX10)
5677     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5678   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5679   if (IsGFX10)
5680     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5681   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5682   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5683   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5684   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5685   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5686   if (!IsGFX10)
5687     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5688   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5689 }
5690 
5691 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5692   cvtMIMG(Inst, Operands, true);
5693 }
5694 
5695 //===----------------------------------------------------------------------===//
5696 // smrd
5697 //===----------------------------------------------------------------------===//
5698 
5699 bool AMDGPUOperand::isSMRDOffset8() const {
5700   return isImm() && isUInt<8>(getImm());
5701 }
5702 
5703 bool AMDGPUOperand::isSMRDOffset20() const {
5704   return isImm() && isUInt<20>(getImm());
5705 }
5706 
5707 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5708   // 32-bit literals are only supported on CI and we only want to use them
5709   // when the offset is > 8-bits.
5710   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5711 }
5712 
5713 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5714   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5715 }
5716 
5717 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5718   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5719 }
5720 
5721 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5722   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5723 }
5724 
5725 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5726   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5727 }
5728 
5729 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5730   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5731 }
5732 
5733 //===----------------------------------------------------------------------===//
5734 // vop3
5735 //===----------------------------------------------------------------------===//
5736 
5737 static bool ConvertOmodMul(int64_t &Mul) {
5738   if (Mul != 1 && Mul != 2 && Mul != 4)
5739     return false;
5740 
5741   Mul >>= 1;
5742   return true;
5743 }
5744 
5745 static bool ConvertOmodDiv(int64_t &Div) {
5746   if (Div == 1) {
5747     Div = 0;
5748     return true;
5749   }
5750 
5751   if (Div == 2) {
5752     Div = 3;
5753     return true;
5754   }
5755 
5756   return false;
5757 }
5758 
5759 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5760   if (BoundCtrl == 0) {
5761     BoundCtrl = 1;
5762     return true;
5763   }
5764 
5765   if (BoundCtrl == -1) {
5766     BoundCtrl = 0;
5767     return true;
5768   }
5769 
5770   return false;
5771 }
5772 
5773 // Note: the order in this table matches the order of operands in AsmString.
5774 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5775   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5776   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5777   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5778   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5779   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5780   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5781   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5782   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5783   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5784   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5785   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5786   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5787   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5788   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5789   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5790   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5791   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5792   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5793   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5794   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5795   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5796   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5797   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5798   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5799   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5800   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5801   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5802   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5803   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5804   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5805   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5806   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5807   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5808   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5809   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5810   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5811   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5812   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5813   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5814   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5815 };
5816 
5817 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5818   unsigned size = Operands.size();
5819   assert(size > 0);
5820 
5821   OperandMatchResultTy res = parseOptionalOpr(Operands);
5822 
5823   // This is a hack to enable hardcoded mandatory operands which follow
5824   // optional operands.
5825   //
5826   // Current design assumes that all operands after the first optional operand
5827   // are also optional. However implementation of some instructions violates
5828   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5829   //
5830   // To alleviate this problem, we have to (implicitly) parse extra operands
5831   // to make sure autogenerated parser of custom operands never hit hardcoded
5832   // mandatory operands.
5833 
5834   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5835 
5836     // We have parsed the first optional operand.
5837     // Parse as many operands as necessary to skip all mandatory operands.
5838 
5839     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5840       if (res != MatchOperand_Success ||
5841           getLexer().is(AsmToken::EndOfStatement)) break;
5842       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5843       res = parseOptionalOpr(Operands);
5844     }
5845   }
5846 
5847   return res;
5848 }
5849 
5850 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5851   OperandMatchResultTy res;
5852   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5853     // try to parse any optional operand here
5854     if (Op.IsBit) {
5855       res = parseNamedBit(Op.Name, Operands, Op.Type);
5856     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5857       res = parseOModOperand(Operands);
5858     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5859                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5860                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5861       res = parseSDWASel(Operands, Op.Name, Op.Type);
5862     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5863       res = parseSDWADstUnused(Operands);
5864     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5865                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5866                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5867                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5868       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5869                                         Op.ConvertResult);
5870     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5871       res = parseDim(Operands);
5872     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5873       res = parseDfmtNfmt(Operands);
5874     } else {
5875       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5876     }
5877     if (res != MatchOperand_NoMatch) {
5878       return res;
5879     }
5880   }
5881   return MatchOperand_NoMatch;
5882 }
5883 
5884 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5885   StringRef Name = Parser.getTok().getString();
5886   if (Name == "mul") {
5887     return parseIntWithPrefix("mul", Operands,
5888                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5889   }
5890 
5891   if (Name == "div") {
5892     return parseIntWithPrefix("div", Operands,
5893                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5894   }
5895 
5896   return MatchOperand_NoMatch;
5897 }
5898 
5899 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5900   cvtVOP3P(Inst, Operands);
5901 
5902   int Opc = Inst.getOpcode();
5903 
5904   int SrcNum;
5905   const int Ops[] = { AMDGPU::OpName::src0,
5906                       AMDGPU::OpName::src1,
5907                       AMDGPU::OpName::src2 };
5908   for (SrcNum = 0;
5909        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5910        ++SrcNum);
5911   assert(SrcNum > 0);
5912 
5913   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5914   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5915 
5916   if ((OpSel & (1 << SrcNum)) != 0) {
5917     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5918     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5919     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5920   }
5921 }
5922 
5923 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5924       // 1. This operand is input modifiers
5925   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5926       // 2. This is not last operand
5927       && Desc.NumOperands > (OpNum + 1)
5928       // 3. Next operand is register class
5929       && Desc.OpInfo[OpNum + 1].RegClass != -1
5930       // 4. Next register is not tied to any other operand
5931       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5932 }
5933 
5934 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5935 {
5936   OptionalImmIndexMap OptionalIdx;
5937   unsigned Opc = Inst.getOpcode();
5938 
5939   unsigned I = 1;
5940   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5941   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5942     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5943   }
5944 
5945   for (unsigned E = Operands.size(); I != E; ++I) {
5946     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5947     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5948       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5949     } else if (Op.isInterpSlot() ||
5950                Op.isInterpAttr() ||
5951                Op.isAttrChan()) {
5952       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5953     } else if (Op.isImmModifier()) {
5954       OptionalIdx[Op.getImmTy()] = I;
5955     } else {
5956       llvm_unreachable("unhandled operand type");
5957     }
5958   }
5959 
5960   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5961     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5962   }
5963 
5964   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5965     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5966   }
5967 
5968   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5969     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5970   }
5971 }
5972 
5973 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5974                               OptionalImmIndexMap &OptionalIdx) {
5975   unsigned Opc = Inst.getOpcode();
5976 
5977   unsigned I = 1;
5978   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5979   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5980     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5981   }
5982 
5983   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5984     // This instruction has src modifiers
5985     for (unsigned E = Operands.size(); I != E; ++I) {
5986       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5987       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5988         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5989       } else if (Op.isImmModifier()) {
5990         OptionalIdx[Op.getImmTy()] = I;
5991       } else if (Op.isRegOrImm()) {
5992         Op.addRegOrImmOperands(Inst, 1);
5993       } else {
5994         llvm_unreachable("unhandled operand type");
5995       }
5996     }
5997   } else {
5998     // No src modifiers
5999     for (unsigned E = Operands.size(); I != E; ++I) {
6000       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6001       if (Op.isMod()) {
6002         OptionalIdx[Op.getImmTy()] = I;
6003       } else {
6004         Op.addRegOrImmOperands(Inst, 1);
6005       }
6006     }
6007   }
6008 
6009   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6010     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6011   }
6012 
6013   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6014     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6015   }
6016 
6017   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6018   // it has src2 register operand that is tied to dst operand
6019   // we don't allow modifiers for this operand in assembler so src2_modifiers
6020   // should be 0.
6021   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6022       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6023       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6024       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6025       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6026       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6027       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6028     auto it = Inst.begin();
6029     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6030     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6031     ++it;
6032     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6033   }
6034 }
6035 
6036 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6037   OptionalImmIndexMap OptionalIdx;
6038   cvtVOP3(Inst, Operands, OptionalIdx);
6039 }
6040 
6041 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6042                                const OperandVector &Operands) {
6043   OptionalImmIndexMap OptIdx;
6044   const int Opc = Inst.getOpcode();
6045   const MCInstrDesc &Desc = MII.get(Opc);
6046 
6047   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6048 
6049   cvtVOP3(Inst, Operands, OptIdx);
6050 
6051   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6052     assert(!IsPacked);
6053     Inst.addOperand(Inst.getOperand(0));
6054   }
6055 
6056   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6057   // instruction, and then figure out where to actually put the modifiers
6058 
6059   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6060 
6061   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6062   if (OpSelHiIdx != -1) {
6063     int DefaultVal = IsPacked ? -1 : 0;
6064     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6065                           DefaultVal);
6066   }
6067 
6068   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6069   if (NegLoIdx != -1) {
6070     assert(IsPacked);
6071     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6072     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6073   }
6074 
6075   const int Ops[] = { AMDGPU::OpName::src0,
6076                       AMDGPU::OpName::src1,
6077                       AMDGPU::OpName::src2 };
6078   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6079                          AMDGPU::OpName::src1_modifiers,
6080                          AMDGPU::OpName::src2_modifiers };
6081 
6082   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6083 
6084   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6085   unsigned OpSelHi = 0;
6086   unsigned NegLo = 0;
6087   unsigned NegHi = 0;
6088 
6089   if (OpSelHiIdx != -1) {
6090     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6091   }
6092 
6093   if (NegLoIdx != -1) {
6094     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6095     NegLo = Inst.getOperand(NegLoIdx).getImm();
6096     NegHi = Inst.getOperand(NegHiIdx).getImm();
6097   }
6098 
6099   for (int J = 0; J < 3; ++J) {
6100     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6101     if (OpIdx == -1)
6102       break;
6103 
6104     uint32_t ModVal = 0;
6105 
6106     if ((OpSel & (1 << J)) != 0)
6107       ModVal |= SISrcMods::OP_SEL_0;
6108 
6109     if ((OpSelHi & (1 << J)) != 0)
6110       ModVal |= SISrcMods::OP_SEL_1;
6111 
6112     if ((NegLo & (1 << J)) != 0)
6113       ModVal |= SISrcMods::NEG;
6114 
6115     if ((NegHi & (1 << J)) != 0)
6116       ModVal |= SISrcMods::NEG_HI;
6117 
6118     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6119 
6120     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6121   }
6122 }
6123 
6124 //===----------------------------------------------------------------------===//
6125 // dpp
6126 //===----------------------------------------------------------------------===//
6127 
6128 bool AMDGPUOperand::isDPP8() const {
6129   return isImmTy(ImmTyDPP8);
6130 }
6131 
6132 bool AMDGPUOperand::isDPPCtrl() const {
6133   using namespace AMDGPU::DPP;
6134 
6135   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6136   if (result) {
6137     int64_t Imm = getImm();
6138     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6139            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6140            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6141            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6142            (Imm == DppCtrl::WAVE_SHL1) ||
6143            (Imm == DppCtrl::WAVE_ROL1) ||
6144            (Imm == DppCtrl::WAVE_SHR1) ||
6145            (Imm == DppCtrl::WAVE_ROR1) ||
6146            (Imm == DppCtrl::ROW_MIRROR) ||
6147            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6148            (Imm == DppCtrl::BCAST15) ||
6149            (Imm == DppCtrl::BCAST31) ||
6150            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6151            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6152   }
6153   return false;
6154 }
6155 
6156 bool AMDGPUOperand::isS16Imm() const {
6157   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6158 }
6159 
6160 bool AMDGPUOperand::isU16Imm() const {
6161   return isImm() && isUInt<16>(getImm());
6162 }
6163 
6164 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6165   if (!isGFX10())
6166     return MatchOperand_NoMatch;
6167 
6168   SMLoc S = Parser.getTok().getLoc();
6169 
6170   if (getLexer().isNot(AsmToken::Identifier))
6171     return MatchOperand_NoMatch;
6172   if (getLexer().getTok().getString() != "dim")
6173     return MatchOperand_NoMatch;
6174 
6175   Parser.Lex();
6176   if (getLexer().isNot(AsmToken::Colon))
6177     return MatchOperand_ParseFail;
6178 
6179   Parser.Lex();
6180 
6181   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6182   // integer.
6183   std::string Token;
6184   if (getLexer().is(AsmToken::Integer)) {
6185     SMLoc Loc = getLexer().getTok().getEndLoc();
6186     Token = getLexer().getTok().getString();
6187     Parser.Lex();
6188     if (getLexer().getTok().getLoc() != Loc)
6189       return MatchOperand_ParseFail;
6190   }
6191   if (getLexer().isNot(AsmToken::Identifier))
6192     return MatchOperand_ParseFail;
6193   Token += getLexer().getTok().getString();
6194 
6195   StringRef DimId = Token;
6196   if (DimId.startswith("SQ_RSRC_IMG_"))
6197     DimId = DimId.substr(12);
6198 
6199   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6200   if (!DimInfo)
6201     return MatchOperand_ParseFail;
6202 
6203   Parser.Lex();
6204 
6205   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6206                                               AMDGPUOperand::ImmTyDim));
6207   return MatchOperand_Success;
6208 }
6209 
6210 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6211   SMLoc S = Parser.getTok().getLoc();
6212   StringRef Prefix;
6213 
6214   if (getLexer().getKind() == AsmToken::Identifier) {
6215     Prefix = Parser.getTok().getString();
6216   } else {
6217     return MatchOperand_NoMatch;
6218   }
6219 
6220   if (Prefix != "dpp8")
6221     return parseDPPCtrl(Operands);
6222   if (!isGFX10())
6223     return MatchOperand_NoMatch;
6224 
6225   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6226 
6227   int64_t Sels[8];
6228 
6229   Parser.Lex();
6230   if (getLexer().isNot(AsmToken::Colon))
6231     return MatchOperand_ParseFail;
6232 
6233   Parser.Lex();
6234   if (getLexer().isNot(AsmToken::LBrac))
6235     return MatchOperand_ParseFail;
6236 
6237   Parser.Lex();
6238   if (getParser().parseAbsoluteExpression(Sels[0]))
6239     return MatchOperand_ParseFail;
6240   if (0 > Sels[0] || 7 < Sels[0])
6241     return MatchOperand_ParseFail;
6242 
6243   for (size_t i = 1; i < 8; ++i) {
6244     if (getLexer().isNot(AsmToken::Comma))
6245       return MatchOperand_ParseFail;
6246 
6247     Parser.Lex();
6248     if (getParser().parseAbsoluteExpression(Sels[i]))
6249       return MatchOperand_ParseFail;
6250     if (0 > Sels[i] || 7 < Sels[i])
6251       return MatchOperand_ParseFail;
6252   }
6253 
6254   if (getLexer().isNot(AsmToken::RBrac))
6255     return MatchOperand_ParseFail;
6256   Parser.Lex();
6257 
6258   unsigned DPP8 = 0;
6259   for (size_t i = 0; i < 8; ++i)
6260     DPP8 |= (Sels[i] << (i * 3));
6261 
6262   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6263   return MatchOperand_Success;
6264 }
6265 
6266 OperandMatchResultTy
6267 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6268   using namespace AMDGPU::DPP;
6269 
6270   SMLoc S = Parser.getTok().getLoc();
6271   StringRef Prefix;
6272   int64_t Int;
6273 
6274   if (getLexer().getKind() == AsmToken::Identifier) {
6275     Prefix = Parser.getTok().getString();
6276   } else {
6277     return MatchOperand_NoMatch;
6278   }
6279 
6280   if (Prefix == "row_mirror") {
6281     Int = DppCtrl::ROW_MIRROR;
6282     Parser.Lex();
6283   } else if (Prefix == "row_half_mirror") {
6284     Int = DppCtrl::ROW_HALF_MIRROR;
6285     Parser.Lex();
6286   } else {
6287     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6288     if (Prefix != "quad_perm"
6289         && Prefix != "row_shl"
6290         && Prefix != "row_shr"
6291         && Prefix != "row_ror"
6292         && Prefix != "wave_shl"
6293         && Prefix != "wave_rol"
6294         && Prefix != "wave_shr"
6295         && Prefix != "wave_ror"
6296         && Prefix != "row_bcast"
6297         && Prefix != "row_share"
6298         && Prefix != "row_xmask") {
6299       return MatchOperand_NoMatch;
6300     }
6301 
6302     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6303       return MatchOperand_NoMatch;
6304 
6305     if (!isVI() && !isGFX9() &&
6306         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6307          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6308          Prefix == "row_bcast"))
6309       return MatchOperand_NoMatch;
6310 
6311     Parser.Lex();
6312     if (getLexer().isNot(AsmToken::Colon))
6313       return MatchOperand_ParseFail;
6314 
6315     if (Prefix == "quad_perm") {
6316       // quad_perm:[%d,%d,%d,%d]
6317       Parser.Lex();
6318       if (getLexer().isNot(AsmToken::LBrac))
6319         return MatchOperand_ParseFail;
6320       Parser.Lex();
6321 
6322       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6323         return MatchOperand_ParseFail;
6324 
6325       for (int i = 0; i < 3; ++i) {
6326         if (getLexer().isNot(AsmToken::Comma))
6327           return MatchOperand_ParseFail;
6328         Parser.Lex();
6329 
6330         int64_t Temp;
6331         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6332           return MatchOperand_ParseFail;
6333         const int shift = i*2 + 2;
6334         Int += (Temp << shift);
6335       }
6336 
6337       if (getLexer().isNot(AsmToken::RBrac))
6338         return MatchOperand_ParseFail;
6339       Parser.Lex();
6340     } else {
6341       // sel:%d
6342       Parser.Lex();
6343       if (getParser().parseAbsoluteExpression(Int))
6344         return MatchOperand_ParseFail;
6345 
6346       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6347         Int |= DppCtrl::ROW_SHL0;
6348       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6349         Int |= DppCtrl::ROW_SHR0;
6350       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6351         Int |= DppCtrl::ROW_ROR0;
6352       } else if (Prefix == "wave_shl" && 1 == Int) {
6353         Int = DppCtrl::WAVE_SHL1;
6354       } else if (Prefix == "wave_rol" && 1 == Int) {
6355         Int = DppCtrl::WAVE_ROL1;
6356       } else if (Prefix == "wave_shr" && 1 == Int) {
6357         Int = DppCtrl::WAVE_SHR1;
6358       } else if (Prefix == "wave_ror" && 1 == Int) {
6359         Int = DppCtrl::WAVE_ROR1;
6360       } else if (Prefix == "row_bcast") {
6361         if (Int == 15) {
6362           Int = DppCtrl::BCAST15;
6363         } else if (Int == 31) {
6364           Int = DppCtrl::BCAST31;
6365         } else {
6366           return MatchOperand_ParseFail;
6367         }
6368       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6369         Int |= DppCtrl::ROW_SHARE_FIRST;
6370       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6371         Int |= DppCtrl::ROW_XMASK_FIRST;
6372       } else {
6373         return MatchOperand_ParseFail;
6374       }
6375     }
6376   }
6377 
6378   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6379   return MatchOperand_Success;
6380 }
6381 
6382 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6383   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6384 }
6385 
6386 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6387   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6388 }
6389 
6390 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6391   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6392 }
6393 
6394 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6395   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6396 }
6397 
6398 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6399   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6400 }
6401 
6402 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6403   OptionalImmIndexMap OptionalIdx;
6404 
6405   unsigned I = 1;
6406   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6407   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6408     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6409   }
6410 
6411   int Fi = 0;
6412   for (unsigned E = Operands.size(); I != E; ++I) {
6413     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6414                                             MCOI::TIED_TO);
6415     if (TiedTo != -1) {
6416       assert((unsigned)TiedTo < Inst.getNumOperands());
6417       // handle tied old or src2 for MAC instructions
6418       Inst.addOperand(Inst.getOperand(TiedTo));
6419     }
6420     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6421     // Add the register arguments
6422     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6423       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6424       // Skip it.
6425       continue;
6426     }
6427 
6428     if (IsDPP8) {
6429       if (Op.isDPP8()) {
6430         Op.addImmOperands(Inst, 1);
6431       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6432         Op.addRegWithFPInputModsOperands(Inst, 2);
6433       } else if (Op.isFI()) {
6434         Fi = Op.getImm();
6435       } else if (Op.isReg()) {
6436         Op.addRegOperands(Inst, 1);
6437       } else {
6438         llvm_unreachable("Invalid operand type");
6439       }
6440     } else {
6441       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6442         Op.addRegWithFPInputModsOperands(Inst, 2);
6443       } else if (Op.isDPPCtrl()) {
6444         Op.addImmOperands(Inst, 1);
6445       } else if (Op.isImm()) {
6446         // Handle optional arguments
6447         OptionalIdx[Op.getImmTy()] = I;
6448       } else {
6449         llvm_unreachable("Invalid operand type");
6450       }
6451     }
6452   }
6453 
6454   if (IsDPP8) {
6455     using namespace llvm::AMDGPU::DPP;
6456     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6457   } else {
6458     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6459     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6460     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6461     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6462       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6463     }
6464   }
6465 }
6466 
6467 //===----------------------------------------------------------------------===//
6468 // sdwa
6469 //===----------------------------------------------------------------------===//
6470 
6471 OperandMatchResultTy
6472 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6473                               AMDGPUOperand::ImmTy Type) {
6474   using namespace llvm::AMDGPU::SDWA;
6475 
6476   SMLoc S = Parser.getTok().getLoc();
6477   StringRef Value;
6478   OperandMatchResultTy res;
6479 
6480   res = parseStringWithPrefix(Prefix, Value);
6481   if (res != MatchOperand_Success) {
6482     return res;
6483   }
6484 
6485   int64_t Int;
6486   Int = StringSwitch<int64_t>(Value)
6487         .Case("BYTE_0", SdwaSel::BYTE_0)
6488         .Case("BYTE_1", SdwaSel::BYTE_1)
6489         .Case("BYTE_2", SdwaSel::BYTE_2)
6490         .Case("BYTE_3", SdwaSel::BYTE_3)
6491         .Case("WORD_0", SdwaSel::WORD_0)
6492         .Case("WORD_1", SdwaSel::WORD_1)
6493         .Case("DWORD", SdwaSel::DWORD)
6494         .Default(0xffffffff);
6495   Parser.Lex(); // eat last token
6496 
6497   if (Int == 0xffffffff) {
6498     return MatchOperand_ParseFail;
6499   }
6500 
6501   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6502   return MatchOperand_Success;
6503 }
6504 
6505 OperandMatchResultTy
6506 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6507   using namespace llvm::AMDGPU::SDWA;
6508 
6509   SMLoc S = Parser.getTok().getLoc();
6510   StringRef Value;
6511   OperandMatchResultTy res;
6512 
6513   res = parseStringWithPrefix("dst_unused", Value);
6514   if (res != MatchOperand_Success) {
6515     return res;
6516   }
6517 
6518   int64_t Int;
6519   Int = StringSwitch<int64_t>(Value)
6520         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6521         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6522         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6523         .Default(0xffffffff);
6524   Parser.Lex(); // eat last token
6525 
6526   if (Int == 0xffffffff) {
6527     return MatchOperand_ParseFail;
6528   }
6529 
6530   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6531   return MatchOperand_Success;
6532 }
6533 
6534 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6535   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6536 }
6537 
6538 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6539   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6540 }
6541 
6542 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6543   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6544 }
6545 
6546 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6547   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6548 }
6549 
6550 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6551                               uint64_t BasicInstType, bool skipVcc) {
6552   using namespace llvm::AMDGPU::SDWA;
6553 
6554   OptionalImmIndexMap OptionalIdx;
6555   bool skippedVcc = false;
6556 
6557   unsigned I = 1;
6558   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6559   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6560     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6561   }
6562 
6563   for (unsigned E = Operands.size(); I != E; ++I) {
6564     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6565     if (skipVcc && !skippedVcc && Op.isReg() &&
6566         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6567       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6568       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6569       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6570       // Skip VCC only if we didn't skip it on previous iteration.
6571       if (BasicInstType == SIInstrFlags::VOP2 &&
6572           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6573         skippedVcc = true;
6574         continue;
6575       } else if (BasicInstType == SIInstrFlags::VOPC &&
6576                  Inst.getNumOperands() == 0) {
6577         skippedVcc = true;
6578         continue;
6579       }
6580     }
6581     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6582       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6583     } else if (Op.isImm()) {
6584       // Handle optional arguments
6585       OptionalIdx[Op.getImmTy()] = I;
6586     } else {
6587       llvm_unreachable("Invalid operand type");
6588     }
6589     skippedVcc = false;
6590   }
6591 
6592   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6593       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6594       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6595     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6596     switch (BasicInstType) {
6597     case SIInstrFlags::VOP1:
6598       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6599       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6600         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6601       }
6602       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6603       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6604       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6605       break;
6606 
6607     case SIInstrFlags::VOP2:
6608       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6609       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6610         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6611       }
6612       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6613       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6614       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6615       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6616       break;
6617 
6618     case SIInstrFlags::VOPC:
6619       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6620         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6621       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6622       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6623       break;
6624 
6625     default:
6626       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6627     }
6628   }
6629 
6630   // special case v_mac_{f16, f32}:
6631   // it has src2 register operand that is tied to dst operand
6632   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6633       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6634     auto it = Inst.begin();
6635     std::advance(
6636       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6637     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6638   }
6639 }
6640 
6641 /// Force static initialization.
6642 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6643   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6644   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6645 }
6646 
6647 #define GET_REGISTER_MATCHER
6648 #define GET_MATCHER_IMPLEMENTATION
6649 #define GET_MNEMONIC_SPELL_CHECKER
6650 #include "AMDGPUGenAsmMatcher.inc"
6651 
6652 // This fuction should be defined after auto-generated include so that we have
6653 // MatchClassKind enum defined
6654 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6655                                                      unsigned Kind) {
6656   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6657   // But MatchInstructionImpl() expects to meet token and fails to validate
6658   // operand. This method checks if we are given immediate operand but expect to
6659   // get corresponding token.
6660   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6661   switch (Kind) {
6662   case MCK_addr64:
6663     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6664   case MCK_gds:
6665     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6666   case MCK_lds:
6667     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6668   case MCK_glc:
6669     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6670   case MCK_idxen:
6671     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6672   case MCK_offen:
6673     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6674   case MCK_SSrcB32:
6675     // When operands have expression values, they will return true for isToken,
6676     // because it is not possible to distinguish between a token and an
6677     // expression at parse time. MatchInstructionImpl() will always try to
6678     // match an operand as a token, when isToken returns true, and when the
6679     // name of the expression is not a valid token, the match will fail,
6680     // so we need to handle it here.
6681     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6682   case MCK_SSrcF32:
6683     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6684   case MCK_SoppBrTarget:
6685     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6686   case MCK_VReg32OrOff:
6687     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6688   case MCK_InterpSlot:
6689     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6690   case MCK_Attr:
6691     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6692   case MCK_AttrChan:
6693     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6694   default:
6695     return Match_InvalidOperand;
6696   }
6697 }
6698 
6699 //===----------------------------------------------------------------------===//
6700 // endpgm
6701 //===----------------------------------------------------------------------===//
6702 
6703 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6704   SMLoc S = Parser.getTok().getLoc();
6705   int64_t Imm = 0;
6706 
6707   if (!parseExpr(Imm)) {
6708     // The operand is optional, if not present default to 0
6709     Imm = 0;
6710   }
6711 
6712   if (!isUInt<16>(Imm)) {
6713     Error(S, "expected a 16-bit value");
6714     return MatchOperand_ParseFail;
6715   }
6716 
6717   Operands.push_back(
6718       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6719   return MatchOperand_Success;
6720 }
6721 
6722 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6723