1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyDLC,
143     ImmTyGLC,
144     ImmTySLC,
145     ImmTyTFE,
146     ImmTyD16,
147     ImmTyClampSI,
148     ImmTyOModSI,
149     ImmTyDppCtrl,
150     ImmTyDppRowMask,
151     ImmTyDppBankMask,
152     ImmTyDppBoundCtrl,
153     ImmTySdwaDstSel,
154     ImmTySdwaSrc0Sel,
155     ImmTySdwaSrc1Sel,
156     ImmTySdwaDstUnused,
157     ImmTyDMask,
158     ImmTyDim,
159     ImmTyUNorm,
160     ImmTyDA,
161     ImmTyR128A16,
162     ImmTyLWE,
163     ImmTyExpTgt,
164     ImmTyExpCompr,
165     ImmTyExpVM,
166     ImmTyFORMAT,
167     ImmTyHwreg,
168     ImmTyOff,
169     ImmTySendMsg,
170     ImmTyInterpSlot,
171     ImmTyInterpAttr,
172     ImmTyAttrChan,
173     ImmTyOpSel,
174     ImmTyOpSelHi,
175     ImmTyNegLo,
176     ImmTyNegHi,
177     ImmTySwizzle,
178     ImmTyGprIdxMode,
179     ImmTyEndpgm,
180     ImmTyHigh
181   };
182 
183 private:
184   struct TokOp {
185     const char *Data;
186     unsigned Length;
187   };
188 
189   struct ImmOp {
190     int64_t Val;
191     ImmTy Type;
192     bool IsFPImm;
193     Modifiers Mods;
194   };
195 
196   struct RegOp {
197     unsigned RegNo;
198     Modifiers Mods;
199   };
200 
201   union {
202     TokOp Tok;
203     ImmOp Imm;
204     RegOp Reg;
205     const MCExpr *Expr;
206   };
207 
208 public:
209   bool isToken() const override {
210     if (Kind == Token)
211       return true;
212 
213     if (Kind != Expression || !Expr)
214       return false;
215 
216     // When parsing operands, we can't always tell if something was meant to be
217     // a token, like 'gds', or an expression that references a global variable.
218     // In this case, we assume the string is an expression, and if we need to
219     // interpret is a token, then we treat the symbol name as the token.
220     return isa<MCSymbolRefExpr>(Expr);
221   }
222 
223   bool isImm() const override {
224     return Kind == Immediate;
225   }
226 
227   bool isInlinableImm(MVT type) const;
228   bool isLiteralImm(MVT type) const;
229 
230   bool isRegKind() const {
231     return Kind == Register;
232   }
233 
234   bool isReg() const override {
235     return isRegKind() && !hasModifiers();
236   }
237 
238   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
239     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
240   }
241 
242   bool isRegOrImmWithInt16InputMods() const {
243     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
244   }
245 
246   bool isRegOrImmWithInt32InputMods() const {
247     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
248   }
249 
250   bool isRegOrImmWithInt64InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
252   }
253 
254   bool isRegOrImmWithFP16InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
256   }
257 
258   bool isRegOrImmWithFP32InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
260   }
261 
262   bool isRegOrImmWithFP64InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
264   }
265 
266   bool isVReg() const {
267     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
268            isRegClass(AMDGPU::VReg_64RegClassID) ||
269            isRegClass(AMDGPU::VReg_96RegClassID) ||
270            isRegClass(AMDGPU::VReg_128RegClassID) ||
271            isRegClass(AMDGPU::VReg_256RegClassID) ||
272            isRegClass(AMDGPU::VReg_512RegClassID);
273   }
274 
275   bool isVReg32() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID);
277   }
278 
279   bool isVReg32OrOff() const {
280     return isOff() || isVReg32();
281   }
282 
283   bool isSDWAOperand(MVT type) const;
284   bool isSDWAFP16Operand() const;
285   bool isSDWAFP32Operand() const;
286   bool isSDWAInt16Operand() const;
287   bool isSDWAInt32Operand() const;
288 
289   bool isImmTy(ImmTy ImmT) const {
290     return isImm() && Imm.Type == ImmT;
291   }
292 
293   bool isImmModifier() const {
294     return isImm() && Imm.Type != ImmTyNone;
295   }
296 
297   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
298   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
299   bool isDMask() const { return isImmTy(ImmTyDMask); }
300   bool isDim() const { return isImmTy(ImmTyDim); }
301   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
302   bool isDA() const { return isImmTy(ImmTyDA); }
303   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
304   bool isLWE() const { return isImmTy(ImmTyLWE); }
305   bool isOff() const { return isImmTy(ImmTyOff); }
306   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
307   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
308   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
309   bool isOffen() const { return isImmTy(ImmTyOffen); }
310   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
311   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
312   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
313   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
314   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
315 
316   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
317   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
318   bool isGDS() const { return isImmTy(ImmTyGDS); }
319   bool isLDS() const { return isImmTy(ImmTyLDS); }
320   bool isDLC() const { return isImmTy(ImmTyDLC); }
321   bool isGLC() const { return isImmTy(ImmTyGLC); }
322   bool isSLC() const { return isImmTy(ImmTySLC); }
323   bool isTFE() const { return isImmTy(ImmTyTFE); }
324   bool isD16() const { return isImmTy(ImmTyD16); }
325   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
326   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
327   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
328   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
329   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
330   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
331   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
332   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
333   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
334   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
335   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
336   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
337   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
338   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
339   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
340   bool isHigh() const { return isImmTy(ImmTyHigh); }
341 
342   bool isMod() const {
343     return isClampSI() || isOModSI();
344   }
345 
346   bool isRegOrImm() const {
347     return isReg() || isImm();
348   }
349 
350   bool isRegClass(unsigned RCID) const;
351 
352   bool isInlineValue() const;
353 
354   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
355     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
356   }
357 
358   bool isSCSrcB16() const {
359     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
360   }
361 
362   bool isSCSrcV2B16() const {
363     return isSCSrcB16();
364   }
365 
366   bool isSCSrcB32() const {
367     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
368   }
369 
370   bool isSCSrcB64() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
372   }
373 
374   bool isSCSrcF16() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
376   }
377 
378   bool isSCSrcV2F16() const {
379     return isSCSrcF16();
380   }
381 
382   bool isSCSrcF32() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
384   }
385 
386   bool isSCSrcF64() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
388   }
389 
390   bool isSSrcB32() const {
391     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
392   }
393 
394   bool isSSrcB16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::i16);
396   }
397 
398   bool isSSrcV2B16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcB16();
401   }
402 
403   bool isSSrcB64() const {
404     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
405     // See isVSrc64().
406     return isSCSrcB64() || isLiteralImm(MVT::i64);
407   }
408 
409   bool isSSrcF32() const {
410     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
411   }
412 
413   bool isSSrcF64() const {
414     return isSCSrcB64() || isLiteralImm(MVT::f64);
415   }
416 
417   bool isSSrcF16() const {
418     return isSCSrcB16() || isLiteralImm(MVT::f16);
419   }
420 
421   bool isSSrcV2F16() const {
422     llvm_unreachable("cannot happen");
423     return isSSrcF16();
424   }
425 
426   bool isSSrcOrLdsB32() const {
427     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
428            isLiteralImm(MVT::i32) || isExpr();
429   }
430 
431   bool isVCSrcB32() const {
432     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
433   }
434 
435   bool isVCSrcB64() const {
436     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
437   }
438 
439   bool isVCSrcB16() const {
440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
441   }
442 
443   bool isVCSrcV2B16() const {
444     return isVCSrcB16();
445   }
446 
447   bool isVCSrcF32() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
449   }
450 
451   bool isVCSrcF64() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
453   }
454 
455   bool isVCSrcF16() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
457   }
458 
459   bool isVCSrcV2F16() const {
460     return isVCSrcF16();
461   }
462 
463   bool isVSrcB32() const {
464     return isVCSrcF32() || isLiteralImm(MVT::i32);
465   }
466 
467   bool isVSrcB64() const {
468     return isVCSrcF64() || isLiteralImm(MVT::i64);
469   }
470 
471   bool isVSrcB16() const {
472     return isVCSrcF16() || isLiteralImm(MVT::i16);
473   }
474 
475   bool isVSrcV2B16() const {
476     return isVSrcB16() || isLiteralImm(MVT::v2i16);
477   }
478 
479   bool isVSrcF32() const {
480     return isVCSrcF32() || isLiteralImm(MVT::f32);
481   }
482 
483   bool isVSrcF64() const {
484     return isVCSrcF64() || isLiteralImm(MVT::f64);
485   }
486 
487   bool isVSrcF16() const {
488     return isVCSrcF16() || isLiteralImm(MVT::f16);
489   }
490 
491   bool isVSrcV2F16() const {
492     return isVSrcF16() || isLiteralImm(MVT::v2f16);
493   }
494 
495   bool isKImmFP32() const {
496     return isLiteralImm(MVT::f32);
497   }
498 
499   bool isKImmFP16() const {
500     return isLiteralImm(MVT::f16);
501   }
502 
503   bool isMem() const override {
504     return false;
505   }
506 
507   bool isExpr() const {
508     return Kind == Expression;
509   }
510 
511   bool isSoppBrTarget() const {
512     return isExpr() || isImm();
513   }
514 
515   bool isSWaitCnt() const;
516   bool isHwreg() const;
517   bool isSendMsg() const;
518   bool isSwizzle() const;
519   bool isSMRDOffset8() const;
520   bool isSMRDOffset20() const;
521   bool isSMRDLiteralOffset() const;
522   bool isDPPCtrl() const;
523   bool isGPRIdxMode() const;
524   bool isS16Imm() const;
525   bool isU16Imm() const;
526   bool isEndpgm() const;
527 
528   StringRef getExpressionAsToken() const {
529     assert(isExpr());
530     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
531     return S->getSymbol().getName();
532   }
533 
534   StringRef getToken() const {
535     assert(isToken());
536 
537     if (Kind == Expression)
538       return getExpressionAsToken();
539 
540     return StringRef(Tok.Data, Tok.Length);
541   }
542 
543   int64_t getImm() const {
544     assert(isImm());
545     return Imm.Val;
546   }
547 
548   ImmTy getImmTy() const {
549     assert(isImm());
550     return Imm.Type;
551   }
552 
553   unsigned getReg() const override {
554     assert(isRegKind());
555     return Reg.RegNo;
556   }
557 
558   SMLoc getStartLoc() const override {
559     return StartLoc;
560   }
561 
562   SMLoc getEndLoc() const override {
563     return EndLoc;
564   }
565 
566   SMRange getLocRange() const {
567     return SMRange(StartLoc, EndLoc);
568   }
569 
570   Modifiers getModifiers() const {
571     assert(isRegKind() || isImmTy(ImmTyNone));
572     return isRegKind() ? Reg.Mods : Imm.Mods;
573   }
574 
575   void setModifiers(Modifiers Mods) {
576     assert(isRegKind() || isImmTy(ImmTyNone));
577     if (isRegKind())
578       Reg.Mods = Mods;
579     else
580       Imm.Mods = Mods;
581   }
582 
583   bool hasModifiers() const {
584     return getModifiers().hasModifiers();
585   }
586 
587   bool hasFPModifiers() const {
588     return getModifiers().hasFPModifiers();
589   }
590 
591   bool hasIntModifiers() const {
592     return getModifiers().hasIntModifiers();
593   }
594 
595   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
596 
597   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
598 
599   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
600 
601   template <unsigned Bitwidth>
602   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
603 
604   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
605     addKImmFPOperands<16>(Inst, N);
606   }
607 
608   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
609     addKImmFPOperands<32>(Inst, N);
610   }
611 
612   void addRegOperands(MCInst &Inst, unsigned N) const;
613 
614   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
615     if (isRegKind())
616       addRegOperands(Inst, N);
617     else if (isExpr())
618       Inst.addOperand(MCOperand::createExpr(Expr));
619     else
620       addImmOperands(Inst, N);
621   }
622 
623   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
624     Modifiers Mods = getModifiers();
625     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
626     if (isRegKind()) {
627       addRegOperands(Inst, N);
628     } else {
629       addImmOperands(Inst, N, false);
630     }
631   }
632 
633   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
634     assert(!hasIntModifiers());
635     addRegOrImmWithInputModsOperands(Inst, N);
636   }
637 
638   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
639     assert(!hasFPModifiers());
640     addRegOrImmWithInputModsOperands(Inst, N);
641   }
642 
643   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
644     Modifiers Mods = getModifiers();
645     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
646     assert(isRegKind());
647     addRegOperands(Inst, N);
648   }
649 
650   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
651     assert(!hasIntModifiers());
652     addRegWithInputModsOperands(Inst, N);
653   }
654 
655   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
656     assert(!hasFPModifiers());
657     addRegWithInputModsOperands(Inst, N);
658   }
659 
660   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
661     if (isImm())
662       addImmOperands(Inst, N);
663     else {
664       assert(isExpr());
665       Inst.addOperand(MCOperand::createExpr(Expr));
666     }
667   }
668 
669   static void printImmTy(raw_ostream& OS, ImmTy Type) {
670     switch (Type) {
671     case ImmTyNone: OS << "None"; break;
672     case ImmTyGDS: OS << "GDS"; break;
673     case ImmTyLDS: OS << "LDS"; break;
674     case ImmTyOffen: OS << "Offen"; break;
675     case ImmTyIdxen: OS << "Idxen"; break;
676     case ImmTyAddr64: OS << "Addr64"; break;
677     case ImmTyOffset: OS << "Offset"; break;
678     case ImmTyInstOffset: OS << "InstOffset"; break;
679     case ImmTyOffset0: OS << "Offset0"; break;
680     case ImmTyOffset1: OS << "Offset1"; break;
681     case ImmTyDLC: OS << "DLC"; break;
682     case ImmTyGLC: OS << "GLC"; break;
683     case ImmTySLC: OS << "SLC"; break;
684     case ImmTyTFE: OS << "TFE"; break;
685     case ImmTyD16: OS << "D16"; break;
686     case ImmTyFORMAT: OS << "FORMAT"; break;
687     case ImmTyClampSI: OS << "ClampSI"; break;
688     case ImmTyOModSI: OS << "OModSI"; break;
689     case ImmTyDppCtrl: OS << "DppCtrl"; break;
690     case ImmTyDppRowMask: OS << "DppRowMask"; break;
691     case ImmTyDppBankMask: OS << "DppBankMask"; break;
692     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
693     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
694     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
695     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
696     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
697     case ImmTyDMask: OS << "DMask"; break;
698     case ImmTyDim: OS << "Dim"; break;
699     case ImmTyUNorm: OS << "UNorm"; break;
700     case ImmTyDA: OS << "DA"; break;
701     case ImmTyR128A16: OS << "R128A16"; break;
702     case ImmTyLWE: OS << "LWE"; break;
703     case ImmTyOff: OS << "Off"; break;
704     case ImmTyExpTgt: OS << "ExpTgt"; break;
705     case ImmTyExpCompr: OS << "ExpCompr"; break;
706     case ImmTyExpVM: OS << "ExpVM"; break;
707     case ImmTyHwreg: OS << "Hwreg"; break;
708     case ImmTySendMsg: OS << "SendMsg"; break;
709     case ImmTyInterpSlot: OS << "InterpSlot"; break;
710     case ImmTyInterpAttr: OS << "InterpAttr"; break;
711     case ImmTyAttrChan: OS << "AttrChan"; break;
712     case ImmTyOpSel: OS << "OpSel"; break;
713     case ImmTyOpSelHi: OS << "OpSelHi"; break;
714     case ImmTyNegLo: OS << "NegLo"; break;
715     case ImmTyNegHi: OS << "NegHi"; break;
716     case ImmTySwizzle: OS << "Swizzle"; break;
717     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
718     case ImmTyHigh: OS << "High"; break;
719     case ImmTyEndpgm:
720       OS << "Endpgm";
721       break;
722     }
723   }
724 
725   void print(raw_ostream &OS) const override {
726     switch (Kind) {
727     case Register:
728       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
729       break;
730     case Immediate:
731       OS << '<' << getImm();
732       if (getImmTy() != ImmTyNone) {
733         OS << " type: "; printImmTy(OS, getImmTy());
734       }
735       OS << " mods: " << Imm.Mods << '>';
736       break;
737     case Token:
738       OS << '\'' << getToken() << '\'';
739       break;
740     case Expression:
741       OS << "<expr " << *Expr << '>';
742       break;
743     }
744   }
745 
746   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
747                                       int64_t Val, SMLoc Loc,
748                                       ImmTy Type = ImmTyNone,
749                                       bool IsFPImm = false) {
750     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
751     Op->Imm.Val = Val;
752     Op->Imm.IsFPImm = IsFPImm;
753     Op->Imm.Type = Type;
754     Op->Imm.Mods = Modifiers();
755     Op->StartLoc = Loc;
756     Op->EndLoc = Loc;
757     return Op;
758   }
759 
760   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
761                                         StringRef Str, SMLoc Loc,
762                                         bool HasExplicitEncodingSize = true) {
763     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
764     Res->Tok.Data = Str.data();
765     Res->Tok.Length = Str.size();
766     Res->StartLoc = Loc;
767     Res->EndLoc = Loc;
768     return Res;
769   }
770 
771   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
772                                       unsigned RegNo, SMLoc S,
773                                       SMLoc E) {
774     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
775     Op->Reg.RegNo = RegNo;
776     Op->Reg.Mods = Modifiers();
777     Op->StartLoc = S;
778     Op->EndLoc = E;
779     return Op;
780   }
781 
782   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
783                                        const class MCExpr *Expr, SMLoc S) {
784     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
785     Op->Expr = Expr;
786     Op->StartLoc = S;
787     Op->EndLoc = S;
788     return Op;
789   }
790 };
791 
792 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
793   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
794   return OS;
795 }
796 
797 //===----------------------------------------------------------------------===//
798 // AsmParser
799 //===----------------------------------------------------------------------===//
800 
801 // Holds info related to the current kernel, e.g. count of SGPRs used.
802 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
803 // .amdgpu_hsa_kernel or at EOF.
804 class KernelScopeInfo {
805   int SgprIndexUnusedMin = -1;
806   int VgprIndexUnusedMin = -1;
807   MCContext *Ctx = nullptr;
808 
809   void usesSgprAt(int i) {
810     if (i >= SgprIndexUnusedMin) {
811       SgprIndexUnusedMin = ++i;
812       if (Ctx) {
813         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
814         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
815       }
816     }
817   }
818 
819   void usesVgprAt(int i) {
820     if (i >= VgprIndexUnusedMin) {
821       VgprIndexUnusedMin = ++i;
822       if (Ctx) {
823         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
824         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
825       }
826     }
827   }
828 
829 public:
830   KernelScopeInfo() = default;
831 
832   void initialize(MCContext &Context) {
833     Ctx = &Context;
834     usesSgprAt(SgprIndexUnusedMin = -1);
835     usesVgprAt(VgprIndexUnusedMin = -1);
836   }
837 
838   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
839     switch (RegKind) {
840       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
841       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
842       default: break;
843     }
844   }
845 };
846 
847 class AMDGPUAsmParser : public MCTargetAsmParser {
848   MCAsmParser &Parser;
849 
850   // Number of extra operands parsed after the first optional operand.
851   // This may be necessary to skip hardcoded mandatory operands.
852   static const unsigned MAX_OPR_LOOKAHEAD = 8;
853 
854   unsigned ForcedEncodingSize = 0;
855   bool ForcedDPP = false;
856   bool ForcedSDWA = false;
857   KernelScopeInfo KernelScope;
858 
859   /// @name Auto-generated Match Functions
860   /// {
861 
862 #define GET_ASSEMBLER_HEADER
863 #include "AMDGPUGenAsmMatcher.inc"
864 
865   /// }
866 
867 private:
868   bool ParseAsAbsoluteExpression(uint32_t &Ret);
869   bool OutOfRangeError(SMRange Range);
870   /// Calculate VGPR/SGPR blocks required for given target, reserved
871   /// registers, and user-specified NextFreeXGPR values.
872   ///
873   /// \param Features [in] Target features, used for bug corrections.
874   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
875   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
876   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
877   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
878   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
879   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
880   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
881   /// \param VGPRBlocks [out] Result VGPR block count.
882   /// \param SGPRBlocks [out] Result SGPR block count.
883   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
884                           bool FlatScrUsed, bool XNACKUsed,
885                           unsigned NextFreeVGPR, SMRange VGPRRange,
886                           unsigned NextFreeSGPR, SMRange SGPRRange,
887                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
888   bool ParseDirectiveAMDGCNTarget();
889   bool ParseDirectiveAMDHSAKernel();
890   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
891   bool ParseDirectiveHSACodeObjectVersion();
892   bool ParseDirectiveHSACodeObjectISA();
893   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
894   bool ParseDirectiveAMDKernelCodeT();
895   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
896   bool ParseDirectiveAMDGPUHsaKernel();
897 
898   bool ParseDirectiveISAVersion();
899   bool ParseDirectiveHSAMetadata();
900   bool ParseDirectivePALMetadataBegin();
901   bool ParseDirectivePALMetadata();
902 
903   /// Common code to parse out a block of text (typically YAML) between start and
904   /// end directives.
905   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
906                            const char *AssemblerDirectiveEnd,
907                            std::string &CollectString);
908 
909   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
910                              RegisterKind RegKind, unsigned Reg1,
911                              unsigned RegNum);
912   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
913                            unsigned& RegNum, unsigned& RegWidth,
914                            unsigned *DwordRegIndex);
915   bool isRegister();
916   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
917   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
918   void initializeGprCountSymbol(RegisterKind RegKind);
919   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
920                              unsigned RegWidth);
921   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
922                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
923   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
924                  bool IsGdsHardcoded);
925 
926 public:
927   enum AMDGPUMatchResultTy {
928     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
929   };
930   enum OperandMode {
931     OperandMode_Default,
932     OperandMode_NSA,
933   };
934 
935   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
936 
937   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
938                const MCInstrInfo &MII,
939                const MCTargetOptions &Options)
940       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
941     MCAsmParserExtension::Initialize(Parser);
942 
943     if (getFeatureBits().none()) {
944       // Set default features.
945       copySTI().ToggleFeature("southern-islands");
946     }
947 
948     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
949 
950     {
951       // TODO: make those pre-defined variables read-only.
952       // Currently there is none suitable machinery in the core llvm-mc for this.
953       // MCSymbol::isRedefinable is intended for another purpose, and
954       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
955       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
956       MCContext &Ctx = getContext();
957       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
958         MCSymbol *Sym =
959             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
963         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
964         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
965       } else {
966         MCSymbol *Sym =
967             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
968         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
969         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
970         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
971         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
972         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
973       }
974       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
975         initializeGprCountSymbol(IS_VGPR);
976         initializeGprCountSymbol(IS_SGPR);
977       } else
978         KernelScope.initialize(getContext());
979     }
980   }
981 
982   bool hasXNACK() const {
983     return AMDGPU::hasXNACK(getSTI());
984   }
985 
986   bool hasMIMG_R128() const {
987     return AMDGPU::hasMIMG_R128(getSTI());
988   }
989 
990   bool hasPackedD16() const {
991     return AMDGPU::hasPackedD16(getSTI());
992   }
993 
994   bool isSI() const {
995     return AMDGPU::isSI(getSTI());
996   }
997 
998   bool isCI() const {
999     return AMDGPU::isCI(getSTI());
1000   }
1001 
1002   bool isVI() const {
1003     return AMDGPU::isVI(getSTI());
1004   }
1005 
1006   bool isGFX9() const {
1007     return AMDGPU::isGFX9(getSTI());
1008   }
1009 
1010   bool isGFX10() const {
1011     return AMDGPU::isGFX10(getSTI());
1012   }
1013 
1014   bool hasInv2PiInlineImm() const {
1015     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1016   }
1017 
1018   bool hasFlatOffsets() const {
1019     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1020   }
1021 
1022   bool hasSGPR102_SGPR103() const {
1023     return !isVI() && !isGFX9();
1024   }
1025 
1026   bool hasSGPR104_SGPR105() const {
1027     return isGFX10();
1028   }
1029 
1030   bool hasIntClamp() const {
1031     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1032   }
1033 
1034   AMDGPUTargetStreamer &getTargetStreamer() {
1035     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1036     return static_cast<AMDGPUTargetStreamer &>(TS);
1037   }
1038 
1039   const MCRegisterInfo *getMRI() const {
1040     // We need this const_cast because for some reason getContext() is not const
1041     // in MCAsmParser.
1042     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1043   }
1044 
1045   const MCInstrInfo *getMII() const {
1046     return &MII;
1047   }
1048 
1049   const FeatureBitset &getFeatureBits() const {
1050     return getSTI().getFeatureBits();
1051   }
1052 
1053   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1054   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1055   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1056 
1057   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1058   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1059   bool isForcedDPP() const { return ForcedDPP; }
1060   bool isForcedSDWA() const { return ForcedSDWA; }
1061   ArrayRef<unsigned> getMatchedVariants() const;
1062 
1063   std::unique_ptr<AMDGPUOperand> parseRegister();
1064   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1065   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1066   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1067                                       unsigned Kind) override;
1068   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1069                                OperandVector &Operands, MCStreamer &Out,
1070                                uint64_t &ErrorInfo,
1071                                bool MatchingInlineAsm) override;
1072   bool ParseDirective(AsmToken DirectiveID) override;
1073   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1074                                     OperandMode Mode = OperandMode_Default);
1075   StringRef parseMnemonicSuffix(StringRef Name);
1076   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1077                         SMLoc NameLoc, OperandVector &Operands) override;
1078   //bool ProcessInstruction(MCInst &Inst);
1079 
1080   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1081 
1082   OperandMatchResultTy
1083   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1084                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1085                      bool (*ConvertResult)(int64_t &) = nullptr);
1086 
1087   OperandMatchResultTy parseOperandArrayWithPrefix(
1088     const char *Prefix,
1089     OperandVector &Operands,
1090     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1091     bool (*ConvertResult)(int64_t&) = nullptr);
1092 
1093   OperandMatchResultTy
1094   parseNamedBit(const char *Name, OperandVector &Operands,
1095                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1096   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1097                                              StringRef &Value);
1098 
1099   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1100   bool parseSP3NegModifier();
1101   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1102   OperandMatchResultTy parseReg(OperandVector &Operands);
1103   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1104   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1105   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1106   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1107   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1108   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1109   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1110 
1111   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1112   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1113   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1114   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1115 
1116   bool parseCnt(int64_t &IntVal);
1117   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1118   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1119 
1120 private:
1121   struct OperandInfoTy {
1122     int64_t Id;
1123     bool IsSymbolic = false;
1124 
1125     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1126   };
1127 
1128   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1129   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1130 
1131   void errorExpTgt();
1132   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1133 
1134   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1135   bool validateSOPLiteral(const MCInst &Inst) const;
1136   bool validateConstantBusLimitations(const MCInst &Inst);
1137   bool validateEarlyClobberLimitations(const MCInst &Inst);
1138   bool validateIntClampSupported(const MCInst &Inst);
1139   bool validateMIMGAtomicDMask(const MCInst &Inst);
1140   bool validateMIMGGatherDMask(const MCInst &Inst);
1141   bool validateMIMGDataSize(const MCInst &Inst);
1142   bool validateMIMGAddrSize(const MCInst &Inst);
1143   bool validateMIMGD16(const MCInst &Inst);
1144   bool validateMIMGDim(const MCInst &Inst);
1145   bool validateLdsDirect(const MCInst &Inst);
1146   bool validateVOP3Literal(const MCInst &Inst) const;
1147   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1148   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1149   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1150 
1151   bool isId(const StringRef Id) const;
1152   bool isId(const AsmToken &Token, const StringRef Id) const;
1153   bool isToken(const AsmToken::TokenKind Kind) const;
1154   bool trySkipId(const StringRef Id);
1155   bool trySkipToken(const AsmToken::TokenKind Kind);
1156   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1157   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1158   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1159   AsmToken::TokenKind getTokenKind() const;
1160   bool parseExpr(int64_t &Imm);
1161   StringRef getTokenStr() const;
1162   AsmToken peekToken();
1163   AsmToken getToken() const;
1164   SMLoc getLoc() const;
1165   void lex();
1166 
1167 public:
1168   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1169   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1170 
1171   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1172   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1173   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1174   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1175   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1176 
1177   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1178                             const unsigned MinVal,
1179                             const unsigned MaxVal,
1180                             const StringRef ErrMsg);
1181   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1182   bool parseSwizzleOffset(int64_t &Imm);
1183   bool parseSwizzleMacro(int64_t &Imm);
1184   bool parseSwizzleQuadPerm(int64_t &Imm);
1185   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1186   bool parseSwizzleBroadcast(int64_t &Imm);
1187   bool parseSwizzleSwap(int64_t &Imm);
1188   bool parseSwizzleReverse(int64_t &Imm);
1189 
1190   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1191   int64_t parseGPRIdxMacro();
1192 
1193   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1194   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1195   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1196   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1197   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1198 
1199   AMDGPUOperand::Ptr defaultDLC() const;
1200   AMDGPUOperand::Ptr defaultGLC() const;
1201   AMDGPUOperand::Ptr defaultSLC() const;
1202 
1203   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1204   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1205   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1206   AMDGPUOperand::Ptr defaultOffsetU12() const;
1207   AMDGPUOperand::Ptr defaultOffsetS13() const;
1208 
1209   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1210 
1211   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1212                OptionalImmIndexMap &OptionalIdx);
1213   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1214   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1215   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1216 
1217   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1218 
1219   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1220                bool IsAtomic = false);
1221   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1222 
1223   OperandMatchResultTy parseDim(OperandVector &Operands);
1224   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1225   AMDGPUOperand::Ptr defaultRowMask() const;
1226   AMDGPUOperand::Ptr defaultBankMask() const;
1227   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1228   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1229 
1230   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1231                                     AMDGPUOperand::ImmTy Type);
1232   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1233   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1234   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1235   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1236   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1237   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1238                 uint64_t BasicInstType, bool skipVcc = false);
1239 
1240   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1241   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1242 };
1243 
1244 struct OptionalOperand {
1245   const char *Name;
1246   AMDGPUOperand::ImmTy Type;
1247   bool IsBit;
1248   bool (*ConvertResult)(int64_t&);
1249 };
1250 
1251 } // end anonymous namespace
1252 
1253 // May be called with integer type with equivalent bitwidth.
1254 static const fltSemantics *getFltSemantics(unsigned Size) {
1255   switch (Size) {
1256   case 4:
1257     return &APFloat::IEEEsingle();
1258   case 8:
1259     return &APFloat::IEEEdouble();
1260   case 2:
1261     return &APFloat::IEEEhalf();
1262   default:
1263     llvm_unreachable("unsupported fp type");
1264   }
1265 }
1266 
1267 static const fltSemantics *getFltSemantics(MVT VT) {
1268   return getFltSemantics(VT.getSizeInBits() / 8);
1269 }
1270 
1271 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1272   switch (OperandType) {
1273   case AMDGPU::OPERAND_REG_IMM_INT32:
1274   case AMDGPU::OPERAND_REG_IMM_FP32:
1275   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1276   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1277     return &APFloat::IEEEsingle();
1278   case AMDGPU::OPERAND_REG_IMM_INT64:
1279   case AMDGPU::OPERAND_REG_IMM_FP64:
1280   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1281   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1282     return &APFloat::IEEEdouble();
1283   case AMDGPU::OPERAND_REG_IMM_INT16:
1284   case AMDGPU::OPERAND_REG_IMM_FP16:
1285   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1286   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1287   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1288   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1289   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1290   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1291     return &APFloat::IEEEhalf();
1292   default:
1293     llvm_unreachable("unsupported fp type");
1294   }
1295 }
1296 
1297 //===----------------------------------------------------------------------===//
1298 // Operand
1299 //===----------------------------------------------------------------------===//
1300 
1301 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1302   bool Lost;
1303 
1304   // Convert literal to single precision
1305   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1306                                                APFloat::rmNearestTiesToEven,
1307                                                &Lost);
1308   // We allow precision lost but not overflow or underflow
1309   if (Status != APFloat::opOK &&
1310       Lost &&
1311       ((Status & APFloat::opOverflow)  != 0 ||
1312        (Status & APFloat::opUnderflow) != 0)) {
1313     return false;
1314   }
1315 
1316   return true;
1317 }
1318 
1319 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1320   return isUIntN(Size, Val) || isIntN(Size, Val);
1321 }
1322 
1323 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1324 
1325   // This is a hack to enable named inline values like
1326   // shared_base with both 32-bit and 64-bit operands.
1327   // Note that these values are defined as
1328   // 32-bit operands only.
1329   if (isInlineValue()) {
1330     return true;
1331   }
1332 
1333   if (!isImmTy(ImmTyNone)) {
1334     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1335     return false;
1336   }
1337   // TODO: We should avoid using host float here. It would be better to
1338   // check the float bit values which is what a few other places do.
1339   // We've had bot failures before due to weird NaN support on mips hosts.
1340 
1341   APInt Literal(64, Imm.Val);
1342 
1343   if (Imm.IsFPImm) { // We got fp literal token
1344     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1345       return AMDGPU::isInlinableLiteral64(Imm.Val,
1346                                           AsmParser->hasInv2PiInlineImm());
1347     }
1348 
1349     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1350     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1351       return false;
1352 
1353     if (type.getScalarSizeInBits() == 16) {
1354       return AMDGPU::isInlinableLiteral16(
1355         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1356         AsmParser->hasInv2PiInlineImm());
1357     }
1358 
1359     // Check if single precision literal is inlinable
1360     return AMDGPU::isInlinableLiteral32(
1361       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1362       AsmParser->hasInv2PiInlineImm());
1363   }
1364 
1365   // We got int literal token.
1366   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1367     return AMDGPU::isInlinableLiteral64(Imm.Val,
1368                                         AsmParser->hasInv2PiInlineImm());
1369   }
1370 
1371   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1372     return false;
1373   }
1374 
1375   if (type.getScalarSizeInBits() == 16) {
1376     return AMDGPU::isInlinableLiteral16(
1377       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1378       AsmParser->hasInv2PiInlineImm());
1379   }
1380 
1381   return AMDGPU::isInlinableLiteral32(
1382     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1383     AsmParser->hasInv2PiInlineImm());
1384 }
1385 
1386 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1387   // Check that this immediate can be added as literal
1388   if (!isImmTy(ImmTyNone)) {
1389     return false;
1390   }
1391 
1392   if (!Imm.IsFPImm) {
1393     // We got int literal token.
1394 
1395     if (type == MVT::f64 && hasFPModifiers()) {
1396       // Cannot apply fp modifiers to int literals preserving the same semantics
1397       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1398       // disable these cases.
1399       return false;
1400     }
1401 
1402     unsigned Size = type.getSizeInBits();
1403     if (Size == 64)
1404       Size = 32;
1405 
1406     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1407     // types.
1408     return isSafeTruncation(Imm.Val, Size);
1409   }
1410 
1411   // We got fp literal token
1412   if (type == MVT::f64) { // Expected 64-bit fp operand
1413     // We would set low 64-bits of literal to zeroes but we accept this literals
1414     return true;
1415   }
1416 
1417   if (type == MVT::i64) { // Expected 64-bit int operand
1418     // We don't allow fp literals in 64-bit integer instructions. It is
1419     // unclear how we should encode them.
1420     return false;
1421   }
1422 
1423   // We allow fp literals with f16x2 operands assuming that the specified
1424   // literal goes into the lower half and the upper half is zero. We also
1425   // require that the literal may be losslesly converted to f16.
1426   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1427                      (type == MVT::v2i16)? MVT::i16 : type;
1428 
1429   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1430   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1431 }
1432 
1433 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1434   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1435 }
1436 
1437 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1438   if (AsmParser->isVI())
1439     return isVReg32();
1440   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1441     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1442   else
1443     return false;
1444 }
1445 
1446 bool AMDGPUOperand::isSDWAFP16Operand() const {
1447   return isSDWAOperand(MVT::f16);
1448 }
1449 
1450 bool AMDGPUOperand::isSDWAFP32Operand() const {
1451   return isSDWAOperand(MVT::f32);
1452 }
1453 
1454 bool AMDGPUOperand::isSDWAInt16Operand() const {
1455   return isSDWAOperand(MVT::i16);
1456 }
1457 
1458 bool AMDGPUOperand::isSDWAInt32Operand() const {
1459   return isSDWAOperand(MVT::i32);
1460 }
1461 
1462 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1463 {
1464   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1465   assert(Size == 2 || Size == 4 || Size == 8);
1466 
1467   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1468 
1469   if (Imm.Mods.Abs) {
1470     Val &= ~FpSignMask;
1471   }
1472   if (Imm.Mods.Neg) {
1473     Val ^= FpSignMask;
1474   }
1475 
1476   return Val;
1477 }
1478 
1479 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1480   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1481                              Inst.getNumOperands())) {
1482     addLiteralImmOperand(Inst, Imm.Val,
1483                          ApplyModifiers &
1484                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1485   } else {
1486     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1487     Inst.addOperand(MCOperand::createImm(Imm.Val));
1488   }
1489 }
1490 
1491 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1492   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1493   auto OpNum = Inst.getNumOperands();
1494   // Check that this operand accepts literals
1495   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1496 
1497   if (ApplyModifiers) {
1498     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1499     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1500     Val = applyInputFPModifiers(Val, Size);
1501   }
1502 
1503   APInt Literal(64, Val);
1504   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1505 
1506   if (Imm.IsFPImm) { // We got fp literal token
1507     switch (OpTy) {
1508     case AMDGPU::OPERAND_REG_IMM_INT64:
1509     case AMDGPU::OPERAND_REG_IMM_FP64:
1510     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1511     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1512       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1513                                        AsmParser->hasInv2PiInlineImm())) {
1514         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1515         return;
1516       }
1517 
1518       // Non-inlineable
1519       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1520         // For fp operands we check if low 32 bits are zeros
1521         if (Literal.getLoBits(32) != 0) {
1522           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1523           "Can't encode literal as exact 64-bit floating-point operand. "
1524           "Low 32-bits will be set to zero");
1525         }
1526 
1527         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1528         return;
1529       }
1530 
1531       // We don't allow fp literals in 64-bit integer instructions. It is
1532       // unclear how we should encode them. This case should be checked earlier
1533       // in predicate methods (isLiteralImm())
1534       llvm_unreachable("fp literal in 64-bit integer instruction.");
1535 
1536     case AMDGPU::OPERAND_REG_IMM_INT32:
1537     case AMDGPU::OPERAND_REG_IMM_FP32:
1538     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1539     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1540     case AMDGPU::OPERAND_REG_IMM_INT16:
1541     case AMDGPU::OPERAND_REG_IMM_FP16:
1542     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1543     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1544     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1545     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1546     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1547     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1548       bool lost;
1549       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1550       // Convert literal to single precision
1551       FPLiteral.convert(*getOpFltSemantics(OpTy),
1552                         APFloat::rmNearestTiesToEven, &lost);
1553       // We allow precision lost but not overflow or underflow. This should be
1554       // checked earlier in isLiteralImm()
1555 
1556       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1557       Inst.addOperand(MCOperand::createImm(ImmVal));
1558       return;
1559     }
1560     default:
1561       llvm_unreachable("invalid operand size");
1562     }
1563 
1564     return;
1565   }
1566 
1567   // We got int literal token.
1568   // Only sign extend inline immediates.
1569   switch (OpTy) {
1570   case AMDGPU::OPERAND_REG_IMM_INT32:
1571   case AMDGPU::OPERAND_REG_IMM_FP32:
1572   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1573   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1574   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1575   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1576     if (isSafeTruncation(Val, 32) &&
1577         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1578                                      AsmParser->hasInv2PiInlineImm())) {
1579       Inst.addOperand(MCOperand::createImm(Val));
1580       return;
1581     }
1582 
1583     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1584     return;
1585 
1586   case AMDGPU::OPERAND_REG_IMM_INT64:
1587   case AMDGPU::OPERAND_REG_IMM_FP64:
1588   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1589   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1590     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1591       Inst.addOperand(MCOperand::createImm(Val));
1592       return;
1593     }
1594 
1595     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1596     return;
1597 
1598   case AMDGPU::OPERAND_REG_IMM_INT16:
1599   case AMDGPU::OPERAND_REG_IMM_FP16:
1600   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1601   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1602     if (isSafeTruncation(Val, 16) &&
1603         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1604                                      AsmParser->hasInv2PiInlineImm())) {
1605       Inst.addOperand(MCOperand::createImm(Val));
1606       return;
1607     }
1608 
1609     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1610     return;
1611 
1612   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1613   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1614     assert(isSafeTruncation(Val, 16));
1615     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1616                                         AsmParser->hasInv2PiInlineImm()));
1617 
1618     Inst.addOperand(MCOperand::createImm(Val));
1619     return;
1620   }
1621   default:
1622     llvm_unreachable("invalid operand size");
1623   }
1624 }
1625 
1626 template <unsigned Bitwidth>
1627 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1628   APInt Literal(64, Imm.Val);
1629 
1630   if (!Imm.IsFPImm) {
1631     // We got int literal token.
1632     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1633     return;
1634   }
1635 
1636   bool Lost;
1637   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1638   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1639                     APFloat::rmNearestTiesToEven, &Lost);
1640   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1641 }
1642 
1643 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1644   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1645 }
1646 
1647 static bool isInlineValue(unsigned Reg) {
1648   switch (Reg) {
1649   case AMDGPU::SRC_SHARED_BASE:
1650   case AMDGPU::SRC_SHARED_LIMIT:
1651   case AMDGPU::SRC_PRIVATE_BASE:
1652   case AMDGPU::SRC_PRIVATE_LIMIT:
1653   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1654     return true;
1655   default:
1656     return false;
1657   }
1658 }
1659 
1660 bool AMDGPUOperand::isInlineValue() const {
1661   return isRegKind() && ::isInlineValue(getReg());
1662 }
1663 
1664 //===----------------------------------------------------------------------===//
1665 // AsmParser
1666 //===----------------------------------------------------------------------===//
1667 
1668 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1669   if (Is == IS_VGPR) {
1670     switch (RegWidth) {
1671       default: return -1;
1672       case 1: return AMDGPU::VGPR_32RegClassID;
1673       case 2: return AMDGPU::VReg_64RegClassID;
1674       case 3: return AMDGPU::VReg_96RegClassID;
1675       case 4: return AMDGPU::VReg_128RegClassID;
1676       case 8: return AMDGPU::VReg_256RegClassID;
1677       case 16: return AMDGPU::VReg_512RegClassID;
1678     }
1679   } else if (Is == IS_TTMP) {
1680     switch (RegWidth) {
1681       default: return -1;
1682       case 1: return AMDGPU::TTMP_32RegClassID;
1683       case 2: return AMDGPU::TTMP_64RegClassID;
1684       case 4: return AMDGPU::TTMP_128RegClassID;
1685       case 8: return AMDGPU::TTMP_256RegClassID;
1686       case 16: return AMDGPU::TTMP_512RegClassID;
1687     }
1688   } else if (Is == IS_SGPR) {
1689     switch (RegWidth) {
1690       default: return -1;
1691       case 1: return AMDGPU::SGPR_32RegClassID;
1692       case 2: return AMDGPU::SGPR_64RegClassID;
1693       case 4: return AMDGPU::SGPR_128RegClassID;
1694       case 8: return AMDGPU::SGPR_256RegClassID;
1695       case 16: return AMDGPU::SGPR_512RegClassID;
1696     }
1697   }
1698   return -1;
1699 }
1700 
1701 static unsigned getSpecialRegForName(StringRef RegName) {
1702   return StringSwitch<unsigned>(RegName)
1703     .Case("exec", AMDGPU::EXEC)
1704     .Case("vcc", AMDGPU::VCC)
1705     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1706     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1707     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1708     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1709     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1710     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1711     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1712     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1713     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1714     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1715     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1716     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1717     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1718     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1719     .Case("m0", AMDGPU::M0)
1720     .Case("scc", AMDGPU::SCC)
1721     .Case("tba", AMDGPU::TBA)
1722     .Case("tma", AMDGPU::TMA)
1723     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1724     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1725     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1726     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1727     .Case("vcc_lo", AMDGPU::VCC_LO)
1728     .Case("vcc_hi", AMDGPU::VCC_HI)
1729     .Case("exec_lo", AMDGPU::EXEC_LO)
1730     .Case("exec_hi", AMDGPU::EXEC_HI)
1731     .Case("tma_lo", AMDGPU::TMA_LO)
1732     .Case("tma_hi", AMDGPU::TMA_HI)
1733     .Case("tba_lo", AMDGPU::TBA_LO)
1734     .Case("tba_hi", AMDGPU::TBA_HI)
1735     .Case("null", AMDGPU::SGPR_NULL)
1736     .Default(0);
1737 }
1738 
1739 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1740                                     SMLoc &EndLoc) {
1741   auto R = parseRegister();
1742   if (!R) return true;
1743   assert(R->isReg());
1744   RegNo = R->getReg();
1745   StartLoc = R->getStartLoc();
1746   EndLoc = R->getEndLoc();
1747   return false;
1748 }
1749 
1750 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1751                                             RegisterKind RegKind, unsigned Reg1,
1752                                             unsigned RegNum) {
1753   switch (RegKind) {
1754   case IS_SPECIAL:
1755     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1756       Reg = AMDGPU::EXEC;
1757       RegWidth = 2;
1758       return true;
1759     }
1760     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1761       Reg = AMDGPU::FLAT_SCR;
1762       RegWidth = 2;
1763       return true;
1764     }
1765     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1766       Reg = AMDGPU::XNACK_MASK;
1767       RegWidth = 2;
1768       return true;
1769     }
1770     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1771       Reg = AMDGPU::VCC;
1772       RegWidth = 2;
1773       return true;
1774     }
1775     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1776       Reg = AMDGPU::TBA;
1777       RegWidth = 2;
1778       return true;
1779     }
1780     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1781       Reg = AMDGPU::TMA;
1782       RegWidth = 2;
1783       return true;
1784     }
1785     return false;
1786   case IS_VGPR:
1787   case IS_SGPR:
1788   case IS_TTMP:
1789     if (Reg1 != Reg + RegWidth) {
1790       return false;
1791     }
1792     RegWidth++;
1793     return true;
1794   default:
1795     llvm_unreachable("unexpected register kind");
1796   }
1797 }
1798 
1799 static const StringRef Registers[] = {
1800   { "v" },
1801   { "s" },
1802   { "ttmp" },
1803 };
1804 
1805 bool
1806 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1807                             const AsmToken &NextToken) const {
1808 
1809   // A list of consecutive registers: [s0,s1,s2,s3]
1810   if (Token.is(AsmToken::LBrac))
1811     return true;
1812 
1813   if (!Token.is(AsmToken::Identifier))
1814     return false;
1815 
1816   // A single register like s0 or a range of registers like s[0:1]
1817 
1818   StringRef RegName = Token.getString();
1819 
1820   for (StringRef Reg : Registers) {
1821     if (RegName.startswith(Reg)) {
1822       if (Reg.size() < RegName.size()) {
1823         unsigned RegNum;
1824         // A single register with an index: rXX
1825         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1826           return true;
1827       } else {
1828         // A range of registers: r[XX:YY].
1829         if (NextToken.is(AsmToken::LBrac))
1830           return true;
1831       }
1832     }
1833   }
1834 
1835   return getSpecialRegForName(RegName);
1836 }
1837 
1838 bool
1839 AMDGPUAsmParser::isRegister()
1840 {
1841   return isRegister(getToken(), peekToken());
1842 }
1843 
1844 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1845                                           unsigned &RegNum, unsigned &RegWidth,
1846                                           unsigned *DwordRegIndex) {
1847   if (DwordRegIndex) { *DwordRegIndex = 0; }
1848   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1849   if (getLexer().is(AsmToken::Identifier)) {
1850     StringRef RegName = Parser.getTok().getString();
1851     if ((Reg = getSpecialRegForName(RegName))) {
1852       Parser.Lex();
1853       RegKind = IS_SPECIAL;
1854     } else {
1855       unsigned RegNumIndex = 0;
1856       if (RegName[0] == 'v') {
1857         RegNumIndex = 1;
1858         RegKind = IS_VGPR;
1859       } else if (RegName[0] == 's') {
1860         RegNumIndex = 1;
1861         RegKind = IS_SGPR;
1862       } else if (RegName.startswith("ttmp")) {
1863         RegNumIndex = strlen("ttmp");
1864         RegKind = IS_TTMP;
1865       } else {
1866         return false;
1867       }
1868       if (RegName.size() > RegNumIndex) {
1869         // Single 32-bit register: vXX.
1870         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1871           return false;
1872         Parser.Lex();
1873         RegWidth = 1;
1874       } else {
1875         // Range of registers: v[XX:YY]. ":YY" is optional.
1876         Parser.Lex();
1877         int64_t RegLo, RegHi;
1878         if (getLexer().isNot(AsmToken::LBrac))
1879           return false;
1880         Parser.Lex();
1881 
1882         if (getParser().parseAbsoluteExpression(RegLo))
1883           return false;
1884 
1885         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1886         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1887           return false;
1888         Parser.Lex();
1889 
1890         if (isRBrace) {
1891           RegHi = RegLo;
1892         } else {
1893           if (getParser().parseAbsoluteExpression(RegHi))
1894             return false;
1895 
1896           if (getLexer().isNot(AsmToken::RBrac))
1897             return false;
1898           Parser.Lex();
1899         }
1900         RegNum = (unsigned) RegLo;
1901         RegWidth = (RegHi - RegLo) + 1;
1902       }
1903     }
1904   } else if (getLexer().is(AsmToken::LBrac)) {
1905     // List of consecutive registers: [s0,s1,s2,s3]
1906     Parser.Lex();
1907     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1908       return false;
1909     if (RegWidth != 1)
1910       return false;
1911     RegisterKind RegKind1;
1912     unsigned Reg1, RegNum1, RegWidth1;
1913     do {
1914       if (getLexer().is(AsmToken::Comma)) {
1915         Parser.Lex();
1916       } else if (getLexer().is(AsmToken::RBrac)) {
1917         Parser.Lex();
1918         break;
1919       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1920         if (RegWidth1 != 1) {
1921           return false;
1922         }
1923         if (RegKind1 != RegKind) {
1924           return false;
1925         }
1926         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1927           return false;
1928         }
1929       } else {
1930         return false;
1931       }
1932     } while (true);
1933   } else {
1934     return false;
1935   }
1936   switch (RegKind) {
1937   case IS_SPECIAL:
1938     RegNum = 0;
1939     RegWidth = 1;
1940     break;
1941   case IS_VGPR:
1942   case IS_SGPR:
1943   case IS_TTMP:
1944   {
1945     unsigned Size = 1;
1946     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1947       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1948       Size = std::min(RegWidth, 4u);
1949     }
1950     if (RegNum % Size != 0)
1951       return false;
1952     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1953     RegNum = RegNum / Size;
1954     int RCID = getRegClass(RegKind, RegWidth);
1955     if (RCID == -1)
1956       return false;
1957     const MCRegisterClass RC = TRI->getRegClass(RCID);
1958     if (RegNum >= RC.getNumRegs())
1959       return false;
1960     Reg = RC.getRegister(RegNum);
1961     break;
1962   }
1963 
1964   default:
1965     llvm_unreachable("unexpected register kind");
1966   }
1967 
1968   if (!subtargetHasRegister(*TRI, Reg))
1969     return false;
1970   return true;
1971 }
1972 
1973 Optional<StringRef>
1974 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1975   switch (RegKind) {
1976   case IS_VGPR:
1977     return StringRef(".amdgcn.next_free_vgpr");
1978   case IS_SGPR:
1979     return StringRef(".amdgcn.next_free_sgpr");
1980   default:
1981     return None;
1982   }
1983 }
1984 
1985 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1986   auto SymbolName = getGprCountSymbolName(RegKind);
1987   assert(SymbolName && "initializing invalid register kind");
1988   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1989   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1990 }
1991 
1992 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1993                                             unsigned DwordRegIndex,
1994                                             unsigned RegWidth) {
1995   // Symbols are only defined for GCN targets
1996   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1997     return true;
1998 
1999   auto SymbolName = getGprCountSymbolName(RegKind);
2000   if (!SymbolName)
2001     return true;
2002   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2003 
2004   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2005   int64_t OldCount;
2006 
2007   if (!Sym->isVariable())
2008     return !Error(getParser().getTok().getLoc(),
2009                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2010   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2011     return !Error(
2012         getParser().getTok().getLoc(),
2013         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2014 
2015   if (OldCount <= NewMax)
2016     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2017 
2018   return true;
2019 }
2020 
2021 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2022   const auto &Tok = Parser.getTok();
2023   SMLoc StartLoc = Tok.getLoc();
2024   SMLoc EndLoc = Tok.getEndLoc();
2025   RegisterKind RegKind;
2026   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2027 
2028   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2029     //FIXME: improve error messages (bug 41303).
2030     Error(StartLoc, "not a valid operand.");
2031     return nullptr;
2032   }
2033   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2034     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2035       return nullptr;
2036   } else
2037     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2038   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2039 }
2040 
2041 bool
2042 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2043   if (HasSP3AbsModifier) {
2044     // This is a workaround for handling expressions
2045     // as arguments of SP3 'abs' modifier, for example:
2046     //     |1.0|
2047     //     |-1|
2048     //     |1+x|
2049     // This syntax is not compatible with syntax of standard
2050     // MC expressions (due to the trailing '|').
2051 
2052     SMLoc EndLoc;
2053     const MCExpr *Expr;
2054     SMLoc StartLoc = getLoc();
2055 
2056     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2057       return true;
2058     }
2059 
2060     if (!Expr->evaluateAsAbsolute(Val))
2061       return Error(StartLoc, "expected absolute expression");
2062 
2063     return false;
2064   }
2065 
2066   return getParser().parseAbsoluteExpression(Val);
2067 }
2068 
2069 OperandMatchResultTy
2070 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2071   // TODO: add syntactic sugar for 1/(2*PI)
2072 
2073   const auto& Tok = getToken();
2074   const auto& NextTok = peekToken();
2075   bool IsReal = Tok.is(AsmToken::Real);
2076   SMLoc S = Tok.getLoc();
2077   bool Negate = false;
2078 
2079   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2080     lex();
2081     IsReal = true;
2082     Negate = true;
2083   }
2084 
2085   if (IsReal) {
2086     // Floating-point expressions are not supported.
2087     // Can only allow floating-point literals with an
2088     // optional sign.
2089 
2090     StringRef Num = getTokenStr();
2091     lex();
2092 
2093     APFloat RealVal(APFloat::IEEEdouble());
2094     auto roundMode = APFloat::rmNearestTiesToEven;
2095     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2096       return MatchOperand_ParseFail;
2097     }
2098     if (Negate)
2099       RealVal.changeSign();
2100 
2101     Operands.push_back(
2102       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2103                                AMDGPUOperand::ImmTyNone, true));
2104 
2105     return MatchOperand_Success;
2106 
2107     // FIXME: Should enable arbitrary expressions here
2108   } else if (Tok.is(AsmToken::Integer) ||
2109              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2110 
2111     int64_t IntVal;
2112     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2113       return MatchOperand_ParseFail;
2114 
2115     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2116     return MatchOperand_Success;
2117   }
2118 
2119   return MatchOperand_NoMatch;
2120 }
2121 
2122 OperandMatchResultTy
2123 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2124   if (!isRegister())
2125     return MatchOperand_NoMatch;
2126 
2127   if (auto R = parseRegister()) {
2128     assert(R->isReg());
2129     Operands.push_back(std::move(R));
2130     return MatchOperand_Success;
2131   }
2132   return MatchOperand_ParseFail;
2133 }
2134 
2135 OperandMatchResultTy
2136 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2137   auto res = parseReg(Operands);
2138   return (res == MatchOperand_NoMatch)?
2139          parseImm(Operands, HasSP3AbsMod) :
2140          res;
2141 }
2142 
2143 // Check if the current token is an SP3 'neg' modifier.
2144 // Currently this modifier is allowed in the following context:
2145 //
2146 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2147 // 2. Before an 'abs' modifier: -abs(...)
2148 // 3. Before an SP3 'abs' modifier: -|...|
2149 //
2150 // In all other cases "-" is handled as a part
2151 // of an expression that follows the sign.
2152 //
2153 // Note: When "-" is followed by an integer literal,
2154 // this is interpreted as integer negation rather
2155 // than a floating-point NEG modifier applied to N.
2156 // Beside being contr-intuitive, such use of floating-point
2157 // NEG modifier would have resulted in different meaning
2158 // of integer literals used with VOP1/2/C and VOP3,
2159 // for example:
2160 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2161 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2162 // Negative fp literals with preceding "-" are
2163 // handled likewise for unifomtity
2164 //
2165 bool
2166 AMDGPUAsmParser::parseSP3NegModifier() {
2167 
2168   AsmToken NextToken[2];
2169   peekTokens(NextToken);
2170 
2171   if (isToken(AsmToken::Minus) &&
2172       (isRegister(NextToken[0], NextToken[1]) ||
2173        NextToken[0].is(AsmToken::Pipe) ||
2174        isId(NextToken[0], "abs"))) {
2175     lex();
2176     return true;
2177   }
2178 
2179   return false;
2180 }
2181 
2182 OperandMatchResultTy
2183 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2184                                               bool AllowImm) {
2185   bool Neg, SP3Neg;
2186   bool Abs, SP3Abs;
2187   SMLoc Loc;
2188 
2189   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2190   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2191     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2192     return MatchOperand_ParseFail;
2193   }
2194 
2195   SP3Neg = parseSP3NegModifier();
2196 
2197   Loc = getLoc();
2198   Neg = trySkipId("neg");
2199   if (Neg && SP3Neg) {
2200     Error(Loc, "expected register or immediate");
2201     return MatchOperand_ParseFail;
2202   }
2203   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2204     return MatchOperand_ParseFail;
2205 
2206   Abs = trySkipId("abs");
2207   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2208     return MatchOperand_ParseFail;
2209 
2210   Loc = getLoc();
2211   SP3Abs = trySkipToken(AsmToken::Pipe);
2212   if (Abs && SP3Abs) {
2213     Error(Loc, "expected register or immediate");
2214     return MatchOperand_ParseFail;
2215   }
2216 
2217   OperandMatchResultTy Res;
2218   if (AllowImm) {
2219     Res = parseRegOrImm(Operands, SP3Abs);
2220   } else {
2221     Res = parseReg(Operands);
2222   }
2223   if (Res != MatchOperand_Success) {
2224     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2225   }
2226 
2227   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2228     return MatchOperand_ParseFail;
2229   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2230     return MatchOperand_ParseFail;
2231   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2232     return MatchOperand_ParseFail;
2233 
2234   AMDGPUOperand::Modifiers Mods;
2235   Mods.Abs = Abs || SP3Abs;
2236   Mods.Neg = Neg || SP3Neg;
2237 
2238   if (Mods.hasFPModifiers()) {
2239     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2240     Op.setModifiers(Mods);
2241   }
2242   return MatchOperand_Success;
2243 }
2244 
2245 OperandMatchResultTy
2246 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2247                                                bool AllowImm) {
2248   bool Sext = trySkipId("sext");
2249   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2250     return MatchOperand_ParseFail;
2251 
2252   OperandMatchResultTy Res;
2253   if (AllowImm) {
2254     Res = parseRegOrImm(Operands);
2255   } else {
2256     Res = parseReg(Operands);
2257   }
2258   if (Res != MatchOperand_Success) {
2259     return Sext? MatchOperand_ParseFail : Res;
2260   }
2261 
2262   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2263     return MatchOperand_ParseFail;
2264 
2265   AMDGPUOperand::Modifiers Mods;
2266   Mods.Sext = Sext;
2267 
2268   if (Mods.hasIntModifiers()) {
2269     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2270     Op.setModifiers(Mods);
2271   }
2272 
2273   return MatchOperand_Success;
2274 }
2275 
2276 OperandMatchResultTy
2277 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2278   return parseRegOrImmWithFPInputMods(Operands, false);
2279 }
2280 
2281 OperandMatchResultTy
2282 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2283   return parseRegOrImmWithIntInputMods(Operands, false);
2284 }
2285 
2286 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2287   auto Loc = getLoc();
2288   if (trySkipId("off")) {
2289     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2290                                                 AMDGPUOperand::ImmTyOff, false));
2291     return MatchOperand_Success;
2292   }
2293 
2294   if (!isRegister())
2295     return MatchOperand_NoMatch;
2296 
2297   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2298   if (Reg) {
2299     Operands.push_back(std::move(Reg));
2300     return MatchOperand_Success;
2301   }
2302 
2303   return MatchOperand_ParseFail;
2304 
2305 }
2306 
2307 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2308   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2309 
2310   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2311       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2312       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2313       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2314     return Match_InvalidOperand;
2315 
2316   if ((TSFlags & SIInstrFlags::VOP3) &&
2317       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2318       getForcedEncodingSize() != 64)
2319     return Match_PreferE32;
2320 
2321   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2322       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2323     // v_mac_f32/16 allow only dst_sel == DWORD;
2324     auto OpNum =
2325         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2326     const auto &Op = Inst.getOperand(OpNum);
2327     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2328       return Match_InvalidOperand;
2329     }
2330   }
2331 
2332   if (TSFlags & SIInstrFlags::FLAT) {
2333     // FIXME: Produces error without correct column reported.
2334     auto Opcode = Inst.getOpcode();
2335     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2336 
2337     const auto &Op = Inst.getOperand(OpNum);
2338     if (!hasFlatOffsets() && Op.getImm() != 0)
2339       return Match_InvalidOperand;
2340 
2341     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2342     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2343     if (isGFX10()) {
2344       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2345         if (!isInt<12>(Op.getImm()))
2346           return Match_InvalidOperand;
2347       } else {
2348         if (!isUInt<11>(Op.getImm()))
2349           return Match_InvalidOperand;
2350       }
2351     }
2352   }
2353 
2354   return Match_Success;
2355 }
2356 
2357 // What asm variants we should check
2358 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2359   if (getForcedEncodingSize() == 32) {
2360     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2361     return makeArrayRef(Variants);
2362   }
2363 
2364   if (isForcedVOP3()) {
2365     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2366     return makeArrayRef(Variants);
2367   }
2368 
2369   if (isForcedSDWA()) {
2370     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2371                                         AMDGPUAsmVariants::SDWA9};
2372     return makeArrayRef(Variants);
2373   }
2374 
2375   if (isForcedDPP()) {
2376     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2377     return makeArrayRef(Variants);
2378   }
2379 
2380   static const unsigned Variants[] = {
2381     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2382     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2383   };
2384 
2385   return makeArrayRef(Variants);
2386 }
2387 
2388 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2389   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2390   const unsigned Num = Desc.getNumImplicitUses();
2391   for (unsigned i = 0; i < Num; ++i) {
2392     unsigned Reg = Desc.ImplicitUses[i];
2393     switch (Reg) {
2394     case AMDGPU::FLAT_SCR:
2395     case AMDGPU::VCC:
2396     case AMDGPU::VCC_LO:
2397     case AMDGPU::VCC_HI:
2398     case AMDGPU::M0:
2399     case AMDGPU::SGPR_NULL:
2400       return Reg;
2401     default:
2402       break;
2403     }
2404   }
2405   return AMDGPU::NoRegister;
2406 }
2407 
2408 // NB: This code is correct only when used to check constant
2409 // bus limitations because GFX7 support no f16 inline constants.
2410 // Note that there are no cases when a GFX7 opcode violates
2411 // constant bus limitations due to the use of an f16 constant.
2412 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2413                                        unsigned OpIdx) const {
2414   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2415 
2416   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2417     return false;
2418   }
2419 
2420   const MCOperand &MO = Inst.getOperand(OpIdx);
2421 
2422   int64_t Val = MO.getImm();
2423   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2424 
2425   switch (OpSize) { // expected operand size
2426   case 8:
2427     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2428   case 4:
2429     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2430   case 2: {
2431     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2432     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2433         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2434         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2435         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2436       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2437     } else {
2438       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2439     }
2440   }
2441   default:
2442     llvm_unreachable("invalid operand size");
2443   }
2444 }
2445 
2446 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2447   const MCOperand &MO = Inst.getOperand(OpIdx);
2448   if (MO.isImm()) {
2449     return !isInlineConstant(Inst, OpIdx);
2450   }
2451   return !MO.isReg() ||
2452          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2453 }
2454 
2455 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2456   const unsigned Opcode = Inst.getOpcode();
2457   const MCInstrDesc &Desc = MII.get(Opcode);
2458   unsigned ConstantBusUseCount = 0;
2459   unsigned NumLiterals = 0;
2460   unsigned LiteralSize;
2461 
2462   if (Desc.TSFlags &
2463       (SIInstrFlags::VOPC |
2464        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2465        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2466        SIInstrFlags::SDWA)) {
2467     // Check special imm operands (used by madmk, etc)
2468     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2469       ++ConstantBusUseCount;
2470     }
2471 
2472     SmallDenseSet<unsigned> SGPRsUsed;
2473     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2474     if (SGPRUsed != AMDGPU::NoRegister) {
2475       SGPRsUsed.insert(SGPRUsed);
2476       ++ConstantBusUseCount;
2477     }
2478 
2479     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2480     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2481     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2482 
2483     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2484 
2485     for (int OpIdx : OpIndices) {
2486       if (OpIdx == -1) break;
2487 
2488       const MCOperand &MO = Inst.getOperand(OpIdx);
2489       if (usesConstantBus(Inst, OpIdx)) {
2490         if (MO.isReg()) {
2491           const unsigned Reg = mc2PseudoReg(MO.getReg());
2492           // Pairs of registers with a partial intersections like these
2493           //   s0, s[0:1]
2494           //   flat_scratch_lo, flat_scratch
2495           //   flat_scratch_lo, flat_scratch_hi
2496           // are theoretically valid but they are disabled anyway.
2497           // Note that this code mimics SIInstrInfo::verifyInstruction
2498           if (!SGPRsUsed.count(Reg)) {
2499             SGPRsUsed.insert(Reg);
2500             ++ConstantBusUseCount;
2501           }
2502           SGPRUsed = Reg;
2503         } else { // Expression or a literal
2504 
2505           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2506             continue; // special operand like VINTERP attr_chan
2507 
2508           // An instruction may use only one literal.
2509           // This has been validated on the previous step.
2510           // See validateVOP3Literal.
2511           // This literal may be used as more than one operand.
2512           // If all these operands are of the same size,
2513           // this literal counts as one scalar value.
2514           // Otherwise it counts as 2 scalar values.
2515           // See "GFX10 Shader Programming", section 3.6.2.3.
2516 
2517           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2518           if (Size < 4) Size = 4;
2519 
2520           if (NumLiterals == 0) {
2521             NumLiterals = 1;
2522             LiteralSize = Size;
2523           } else if (LiteralSize != Size) {
2524             NumLiterals = 2;
2525           }
2526         }
2527       }
2528     }
2529   }
2530   ConstantBusUseCount += NumLiterals;
2531 
2532   if (isGFX10())
2533     return ConstantBusUseCount <= 2;
2534 
2535   return ConstantBusUseCount <= 1;
2536 }
2537 
2538 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2539   const unsigned Opcode = Inst.getOpcode();
2540   const MCInstrDesc &Desc = MII.get(Opcode);
2541 
2542   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2543   if (DstIdx == -1 ||
2544       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2545     return true;
2546   }
2547 
2548   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2549 
2550   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2551   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2552   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2553 
2554   assert(DstIdx != -1);
2555   const MCOperand &Dst = Inst.getOperand(DstIdx);
2556   assert(Dst.isReg());
2557   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2558 
2559   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2560 
2561   for (int SrcIdx : SrcIndices) {
2562     if (SrcIdx == -1) break;
2563     const MCOperand &Src = Inst.getOperand(SrcIdx);
2564     if (Src.isReg()) {
2565       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2566       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2567         return false;
2568       }
2569     }
2570   }
2571 
2572   return true;
2573 }
2574 
2575 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2576 
2577   const unsigned Opc = Inst.getOpcode();
2578   const MCInstrDesc &Desc = MII.get(Opc);
2579 
2580   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2581     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2582     assert(ClampIdx != -1);
2583     return Inst.getOperand(ClampIdx).getImm() == 0;
2584   }
2585 
2586   return true;
2587 }
2588 
2589 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2590 
2591   const unsigned Opc = Inst.getOpcode();
2592   const MCInstrDesc &Desc = MII.get(Opc);
2593 
2594   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2595     return true;
2596 
2597   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2598   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2599   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2600 
2601   assert(VDataIdx != -1);
2602   assert(DMaskIdx != -1);
2603   assert(TFEIdx != -1);
2604 
2605   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2606   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2607   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2608   if (DMask == 0)
2609     DMask = 1;
2610 
2611   unsigned DataSize =
2612     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2613   if (hasPackedD16()) {
2614     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2615     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2616       DataSize = (DataSize + 1) / 2;
2617   }
2618 
2619   return (VDataSize / 4) == DataSize + TFESize;
2620 }
2621 
2622 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2623   const unsigned Opc = Inst.getOpcode();
2624   const MCInstrDesc &Desc = MII.get(Opc);
2625 
2626   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2627     return true;
2628 
2629   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2630   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2631       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2632   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2633   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2634   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2635 
2636   assert(VAddr0Idx != -1);
2637   assert(SrsrcIdx != -1);
2638   assert(DimIdx != -1);
2639   assert(SrsrcIdx > VAddr0Idx);
2640 
2641   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2642   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2643   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2644   unsigned VAddrSize =
2645       IsNSA ? SrsrcIdx - VAddr0Idx
2646             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2647 
2648   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2649                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2650                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2651                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2652   if (!IsNSA) {
2653     if (AddrSize > 8)
2654       AddrSize = 16;
2655     else if (AddrSize > 4)
2656       AddrSize = 8;
2657   }
2658 
2659   return VAddrSize == AddrSize;
2660 }
2661 
2662 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2663 
2664   const unsigned Opc = Inst.getOpcode();
2665   const MCInstrDesc &Desc = MII.get(Opc);
2666 
2667   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2668     return true;
2669   if (!Desc.mayLoad() || !Desc.mayStore())
2670     return true; // Not atomic
2671 
2672   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2673   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2674 
2675   // This is an incomplete check because image_atomic_cmpswap
2676   // may only use 0x3 and 0xf while other atomic operations
2677   // may use 0x1 and 0x3. However these limitations are
2678   // verified when we check that dmask matches dst size.
2679   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2680 }
2681 
2682 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2683 
2684   const unsigned Opc = Inst.getOpcode();
2685   const MCInstrDesc &Desc = MII.get(Opc);
2686 
2687   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2688     return true;
2689 
2690   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2691   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2692 
2693   // GATHER4 instructions use dmask in a different fashion compared to
2694   // other MIMG instructions. The only useful DMASK values are
2695   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2696   // (red,red,red,red) etc.) The ISA document doesn't mention
2697   // this.
2698   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2699 }
2700 
2701 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2702 
2703   const unsigned Opc = Inst.getOpcode();
2704   const MCInstrDesc &Desc = MII.get(Opc);
2705 
2706   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2707     return true;
2708 
2709   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2710   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2711     if (isCI() || isSI())
2712       return false;
2713   }
2714 
2715   return true;
2716 }
2717 
2718 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2719   const unsigned Opc = Inst.getOpcode();
2720   const MCInstrDesc &Desc = MII.get(Opc);
2721 
2722   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2723     return true;
2724 
2725   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2726   if (DimIdx < 0)
2727     return true;
2728 
2729   long Imm = Inst.getOperand(DimIdx).getImm();
2730   if (Imm < 0 || Imm >= 8)
2731     return false;
2732 
2733   return true;
2734 }
2735 
2736 static bool IsRevOpcode(const unsigned Opcode)
2737 {
2738   switch (Opcode) {
2739   case AMDGPU::V_SUBREV_F32_e32:
2740   case AMDGPU::V_SUBREV_F32_e64:
2741   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2742   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2743   case AMDGPU::V_SUBREV_F32_e32_vi:
2744   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2745   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2746   case AMDGPU::V_SUBREV_F32_e64_vi:
2747 
2748   case AMDGPU::V_SUBREV_I32_e32:
2749   case AMDGPU::V_SUBREV_I32_e64:
2750   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2751   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2752 
2753   case AMDGPU::V_SUBBREV_U32_e32:
2754   case AMDGPU::V_SUBBREV_U32_e64:
2755   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2756   case AMDGPU::V_SUBBREV_U32_e32_vi:
2757   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2758   case AMDGPU::V_SUBBREV_U32_e64_vi:
2759 
2760   case AMDGPU::V_SUBREV_U32_e32:
2761   case AMDGPU::V_SUBREV_U32_e64:
2762   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2763   case AMDGPU::V_SUBREV_U32_e32_vi:
2764   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2765   case AMDGPU::V_SUBREV_U32_e64_vi:
2766 
2767   case AMDGPU::V_SUBREV_F16_e32:
2768   case AMDGPU::V_SUBREV_F16_e64:
2769   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2770   case AMDGPU::V_SUBREV_F16_e32_vi:
2771   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2772   case AMDGPU::V_SUBREV_F16_e64_vi:
2773 
2774   case AMDGPU::V_SUBREV_U16_e32:
2775   case AMDGPU::V_SUBREV_U16_e64:
2776   case AMDGPU::V_SUBREV_U16_e32_vi:
2777   case AMDGPU::V_SUBREV_U16_e64_vi:
2778 
2779   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2780   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2781   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2782 
2783   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2784   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2785 
2786   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2787   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2788 
2789   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2790   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2791 
2792   case AMDGPU::V_LSHRREV_B32_e32:
2793   case AMDGPU::V_LSHRREV_B32_e64:
2794   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2795   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2796   case AMDGPU::V_LSHRREV_B32_e32_vi:
2797   case AMDGPU::V_LSHRREV_B32_e64_vi:
2798   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2799   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2800 
2801   case AMDGPU::V_ASHRREV_I32_e32:
2802   case AMDGPU::V_ASHRREV_I32_e64:
2803   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2804   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2805   case AMDGPU::V_ASHRREV_I32_e32_vi:
2806   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2807   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2808   case AMDGPU::V_ASHRREV_I32_e64_vi:
2809 
2810   case AMDGPU::V_LSHLREV_B32_e32:
2811   case AMDGPU::V_LSHLREV_B32_e64:
2812   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2813   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2814   case AMDGPU::V_LSHLREV_B32_e32_vi:
2815   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2816   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2817   case AMDGPU::V_LSHLREV_B32_e64_vi:
2818 
2819   case AMDGPU::V_LSHLREV_B16_e32:
2820   case AMDGPU::V_LSHLREV_B16_e64:
2821   case AMDGPU::V_LSHLREV_B16_e32_vi:
2822   case AMDGPU::V_LSHLREV_B16_e64_vi:
2823   case AMDGPU::V_LSHLREV_B16_gfx10:
2824 
2825   case AMDGPU::V_LSHRREV_B16_e32:
2826   case AMDGPU::V_LSHRREV_B16_e64:
2827   case AMDGPU::V_LSHRREV_B16_e32_vi:
2828   case AMDGPU::V_LSHRREV_B16_e64_vi:
2829   case AMDGPU::V_LSHRREV_B16_gfx10:
2830 
2831   case AMDGPU::V_ASHRREV_I16_e32:
2832   case AMDGPU::V_ASHRREV_I16_e64:
2833   case AMDGPU::V_ASHRREV_I16_e32_vi:
2834   case AMDGPU::V_ASHRREV_I16_e64_vi:
2835   case AMDGPU::V_ASHRREV_I16_gfx10:
2836 
2837   case AMDGPU::V_LSHLREV_B64:
2838   case AMDGPU::V_LSHLREV_B64_gfx10:
2839   case AMDGPU::V_LSHLREV_B64_vi:
2840 
2841   case AMDGPU::V_LSHRREV_B64:
2842   case AMDGPU::V_LSHRREV_B64_gfx10:
2843   case AMDGPU::V_LSHRREV_B64_vi:
2844 
2845   case AMDGPU::V_ASHRREV_I64:
2846   case AMDGPU::V_ASHRREV_I64_gfx10:
2847   case AMDGPU::V_ASHRREV_I64_vi:
2848 
2849   case AMDGPU::V_PK_LSHLREV_B16:
2850   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2851   case AMDGPU::V_PK_LSHLREV_B16_vi:
2852 
2853   case AMDGPU::V_PK_LSHRREV_B16:
2854   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2855   case AMDGPU::V_PK_LSHRREV_B16_vi:
2856   case AMDGPU::V_PK_ASHRREV_I16:
2857   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2858   case AMDGPU::V_PK_ASHRREV_I16_vi:
2859     return true;
2860   default:
2861     return false;
2862   }
2863 }
2864 
2865 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2866 
2867   using namespace SIInstrFlags;
2868   const unsigned Opcode = Inst.getOpcode();
2869   const MCInstrDesc &Desc = MII.get(Opcode);
2870 
2871   // lds_direct register is defined so that it can be used
2872   // with 9-bit operands only. Ignore encodings which do not accept these.
2873   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2874     return true;
2875 
2876   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2877   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2878   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2879 
2880   const int SrcIndices[] = { Src1Idx, Src2Idx };
2881 
2882   // lds_direct cannot be specified as either src1 or src2.
2883   for (int SrcIdx : SrcIndices) {
2884     if (SrcIdx == -1) break;
2885     const MCOperand &Src = Inst.getOperand(SrcIdx);
2886     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2887       return false;
2888     }
2889   }
2890 
2891   if (Src0Idx == -1)
2892     return true;
2893 
2894   const MCOperand &Src = Inst.getOperand(Src0Idx);
2895   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2896     return true;
2897 
2898   // lds_direct is specified as src0. Check additional limitations.
2899   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2900 }
2901 
2902 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2903   unsigned Opcode = Inst.getOpcode();
2904   const MCInstrDesc &Desc = MII.get(Opcode);
2905   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2906     return true;
2907 
2908   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2909   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2910 
2911   const int OpIndices[] = { Src0Idx, Src1Idx };
2912 
2913   unsigned NumLiterals = 0;
2914   uint32_t LiteralValue;
2915 
2916   for (int OpIdx : OpIndices) {
2917     if (OpIdx == -1) break;
2918 
2919     const MCOperand &MO = Inst.getOperand(OpIdx);
2920     if (MO.isImm() &&
2921         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2922         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2923         !isInlineConstant(Inst, OpIdx)) {
2924       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2925       if (NumLiterals == 0 || LiteralValue != Value) {
2926         LiteralValue = Value;
2927         ++NumLiterals;
2928       }
2929     }
2930   }
2931 
2932   return NumLiterals <= 1;
2933 }
2934 
2935 // VOP3 literal is only allowed in GFX10+ and only one can be used
2936 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
2937   unsigned Opcode = Inst.getOpcode();
2938   const MCInstrDesc &Desc = MII.get(Opcode);
2939   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
2940     return true;
2941 
2942   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2943   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2944   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2945 
2946   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2947 
2948   unsigned NumLiterals = 0;
2949   uint32_t LiteralValue;
2950 
2951   for (int OpIdx : OpIndices) {
2952     if (OpIdx == -1) break;
2953 
2954     const MCOperand &MO = Inst.getOperand(OpIdx);
2955     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
2956       continue;
2957 
2958     if (!isInlineConstant(Inst, OpIdx)) {
2959       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2960       if (NumLiterals == 0 || LiteralValue != Value) {
2961         LiteralValue = Value;
2962         ++NumLiterals;
2963       }
2964     }
2965   }
2966 
2967   return !NumLiterals ||
2968          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
2969 }
2970 
2971 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2972                                           const SMLoc &IDLoc) {
2973   if (!validateLdsDirect(Inst)) {
2974     Error(IDLoc,
2975       "invalid use of lds_direct");
2976     return false;
2977   }
2978   if (!validateSOPLiteral(Inst)) {
2979     Error(IDLoc,
2980       "only one literal operand is allowed");
2981     return false;
2982   }
2983   if (!validateVOP3Literal(Inst)) {
2984     Error(IDLoc,
2985       "invalid literal operand");
2986     return false;
2987   }
2988   if (!validateConstantBusLimitations(Inst)) {
2989     Error(IDLoc,
2990       "invalid operand (violates constant bus restrictions)");
2991     return false;
2992   }
2993   if (!validateEarlyClobberLimitations(Inst)) {
2994     Error(IDLoc,
2995       "destination must be different than all sources");
2996     return false;
2997   }
2998   if (!validateIntClampSupported(Inst)) {
2999     Error(IDLoc,
3000       "integer clamping is not supported on this GPU");
3001     return false;
3002   }
3003   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3004   if (!validateMIMGD16(Inst)) {
3005     Error(IDLoc,
3006       "d16 modifier is not supported on this GPU");
3007     return false;
3008   }
3009   if (!validateMIMGDim(Inst)) {
3010     Error(IDLoc, "dim modifier is required on this GPU");
3011     return false;
3012   }
3013   if (!validateMIMGDataSize(Inst)) {
3014     Error(IDLoc,
3015       "image data size does not match dmask and tfe");
3016     return false;
3017   }
3018   if (!validateMIMGAddrSize(Inst)) {
3019     Error(IDLoc,
3020       "image address size does not match dim and a16");
3021     return false;
3022   }
3023   if (!validateMIMGAtomicDMask(Inst)) {
3024     Error(IDLoc,
3025       "invalid atomic image dmask");
3026     return false;
3027   }
3028   if (!validateMIMGGatherDMask(Inst)) {
3029     Error(IDLoc,
3030       "invalid image_gather dmask: only one bit must be set");
3031     return false;
3032   }
3033 
3034   return true;
3035 }
3036 
3037 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3038                                             const FeatureBitset &FBS,
3039                                             unsigned VariantID = 0);
3040 
3041 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3042                                               OperandVector &Operands,
3043                                               MCStreamer &Out,
3044                                               uint64_t &ErrorInfo,
3045                                               bool MatchingInlineAsm) {
3046   MCInst Inst;
3047   unsigned Result = Match_Success;
3048   for (auto Variant : getMatchedVariants()) {
3049     uint64_t EI;
3050     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3051                                   Variant);
3052     // We order match statuses from least to most specific. We use most specific
3053     // status as resulting
3054     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3055     if ((R == Match_Success) ||
3056         (R == Match_PreferE32) ||
3057         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3058         (R == Match_InvalidOperand && Result != Match_MissingFeature
3059                                    && Result != Match_PreferE32) ||
3060         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3061                                    && Result != Match_MissingFeature
3062                                    && Result != Match_PreferE32)) {
3063       Result = R;
3064       ErrorInfo = EI;
3065     }
3066     if (R == Match_Success)
3067       break;
3068   }
3069 
3070   switch (Result) {
3071   default: break;
3072   case Match_Success:
3073     if (!validateInstruction(Inst, IDLoc)) {
3074       return true;
3075     }
3076     Inst.setLoc(IDLoc);
3077     Out.EmitInstruction(Inst, getSTI());
3078     return false;
3079 
3080   case Match_MissingFeature:
3081     return Error(IDLoc, "instruction not supported on this GPU");
3082 
3083   case Match_MnemonicFail: {
3084     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3085     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3086         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3087     return Error(IDLoc, "invalid instruction" + Suggestion,
3088                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3089   }
3090 
3091   case Match_InvalidOperand: {
3092     SMLoc ErrorLoc = IDLoc;
3093     if (ErrorInfo != ~0ULL) {
3094       if (ErrorInfo >= Operands.size()) {
3095         return Error(IDLoc, "too few operands for instruction");
3096       }
3097       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3098       if (ErrorLoc == SMLoc())
3099         ErrorLoc = IDLoc;
3100     }
3101     return Error(ErrorLoc, "invalid operand for instruction");
3102   }
3103 
3104   case Match_PreferE32:
3105     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3106                         "should be encoded as e32");
3107   }
3108   llvm_unreachable("Implement any new match types added!");
3109 }
3110 
3111 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3112   int64_t Tmp = -1;
3113   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3114     return true;
3115   }
3116   if (getParser().parseAbsoluteExpression(Tmp)) {
3117     return true;
3118   }
3119   Ret = static_cast<uint32_t>(Tmp);
3120   return false;
3121 }
3122 
3123 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3124                                                uint32_t &Minor) {
3125   if (ParseAsAbsoluteExpression(Major))
3126     return TokError("invalid major version");
3127 
3128   if (getLexer().isNot(AsmToken::Comma))
3129     return TokError("minor version number required, comma expected");
3130   Lex();
3131 
3132   if (ParseAsAbsoluteExpression(Minor))
3133     return TokError("invalid minor version");
3134 
3135   return false;
3136 }
3137 
3138 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3139   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3140     return TokError("directive only supported for amdgcn architecture");
3141 
3142   std::string Target;
3143 
3144   SMLoc TargetStart = getTok().getLoc();
3145   if (getParser().parseEscapedString(Target))
3146     return true;
3147   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3148 
3149   std::string ExpectedTarget;
3150   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3151   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3152 
3153   if (Target != ExpectedTargetOS.str())
3154     return getParser().Error(TargetRange.Start, "target must match options",
3155                              TargetRange);
3156 
3157   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3158   return false;
3159 }
3160 
3161 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3162   return getParser().Error(Range.Start, "value out of range", Range);
3163 }
3164 
3165 bool AMDGPUAsmParser::calculateGPRBlocks(
3166     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3167     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3168     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3169     unsigned &SGPRBlocks) {
3170   // TODO(scott.linder): These calculations are duplicated from
3171   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3172   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3173 
3174   unsigned NumVGPRs = NextFreeVGPR;
3175   unsigned NumSGPRs = NextFreeSGPR;
3176 
3177   if (Version.Major >= 10)
3178     NumSGPRs = 0;
3179   else {
3180     unsigned MaxAddressableNumSGPRs =
3181         IsaInfo::getAddressableNumSGPRs(&getSTI());
3182 
3183     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3184         NumSGPRs > MaxAddressableNumSGPRs)
3185       return OutOfRangeError(SGPRRange);
3186 
3187     NumSGPRs +=
3188         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3189 
3190     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3191         NumSGPRs > MaxAddressableNumSGPRs)
3192       return OutOfRangeError(SGPRRange);
3193 
3194     if (Features.test(FeatureSGPRInitBug))
3195       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3196   }
3197 
3198   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3199   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3200 
3201   return false;
3202 }
3203 
3204 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3205   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3206     return TokError("directive only supported for amdgcn architecture");
3207 
3208   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3209     return TokError("directive only supported for amdhsa OS");
3210 
3211   StringRef KernelName;
3212   if (getParser().parseIdentifier(KernelName))
3213     return true;
3214 
3215   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3216 
3217   StringSet<> Seen;
3218 
3219   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3220 
3221   SMRange VGPRRange;
3222   uint64_t NextFreeVGPR = 0;
3223   SMRange SGPRRange;
3224   uint64_t NextFreeSGPR = 0;
3225   unsigned UserSGPRCount = 0;
3226   bool ReserveVCC = true;
3227   bool ReserveFlatScr = true;
3228   bool ReserveXNACK = hasXNACK();
3229 
3230   while (true) {
3231     while (getLexer().is(AsmToken::EndOfStatement))
3232       Lex();
3233 
3234     if (getLexer().isNot(AsmToken::Identifier))
3235       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3236 
3237     StringRef ID = getTok().getIdentifier();
3238     SMRange IDRange = getTok().getLocRange();
3239     Lex();
3240 
3241     if (ID == ".end_amdhsa_kernel")
3242       break;
3243 
3244     if (Seen.find(ID) != Seen.end())
3245       return TokError(".amdhsa_ directives cannot be repeated");
3246     Seen.insert(ID);
3247 
3248     SMLoc ValStart = getTok().getLoc();
3249     int64_t IVal;
3250     if (getParser().parseAbsoluteExpression(IVal))
3251       return true;
3252     SMLoc ValEnd = getTok().getLoc();
3253     SMRange ValRange = SMRange(ValStart, ValEnd);
3254 
3255     if (IVal < 0)
3256       return OutOfRangeError(ValRange);
3257 
3258     uint64_t Val = IVal;
3259 
3260 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3261   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3262     return OutOfRangeError(RANGE);                                             \
3263   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3264 
3265     if (ID == ".amdhsa_group_segment_fixed_size") {
3266       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3267         return OutOfRangeError(ValRange);
3268       KD.group_segment_fixed_size = Val;
3269     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3270       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3271         return OutOfRangeError(ValRange);
3272       KD.private_segment_fixed_size = Val;
3273     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3274       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3275                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3276                        Val, ValRange);
3277       UserSGPRCount += 4;
3278     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3279       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3280                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3281                        ValRange);
3282       UserSGPRCount += 2;
3283     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3284       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3285                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3286                        ValRange);
3287       UserSGPRCount += 2;
3288     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3289       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3290                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3291                        Val, ValRange);
3292       UserSGPRCount += 2;
3293     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3294       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3295                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3296                        ValRange);
3297       UserSGPRCount += 2;
3298     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3299       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3300                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3301                        ValRange);
3302       UserSGPRCount += 2;
3303     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3304       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3305                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3306                        Val, ValRange);
3307       UserSGPRCount += 1;
3308     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3309       PARSE_BITS_ENTRY(
3310           KD.compute_pgm_rsrc2,
3311           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3312           ValRange);
3313     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3314       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3315                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3316                        ValRange);
3317     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3318       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3319                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3320                        ValRange);
3321     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3322       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3323                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3324                        ValRange);
3325     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3326       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3327                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3328                        ValRange);
3329     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3330       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3331                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3332                        ValRange);
3333     } else if (ID == ".amdhsa_next_free_vgpr") {
3334       VGPRRange = ValRange;
3335       NextFreeVGPR = Val;
3336     } else if (ID == ".amdhsa_next_free_sgpr") {
3337       SGPRRange = ValRange;
3338       NextFreeSGPR = Val;
3339     } else if (ID == ".amdhsa_reserve_vcc") {
3340       if (!isUInt<1>(Val))
3341         return OutOfRangeError(ValRange);
3342       ReserveVCC = Val;
3343     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3344       if (IVersion.Major < 7)
3345         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3346                                  IDRange);
3347       if (!isUInt<1>(Val))
3348         return OutOfRangeError(ValRange);
3349       ReserveFlatScr = Val;
3350     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3351       if (IVersion.Major < 8)
3352         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3353                                  IDRange);
3354       if (!isUInt<1>(Val))
3355         return OutOfRangeError(ValRange);
3356       ReserveXNACK = Val;
3357     } else if (ID == ".amdhsa_float_round_mode_32") {
3358       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3359                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3360     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3361       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3362                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3363     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3364       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3365                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3366     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3367       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3368                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3369                        ValRange);
3370     } else if (ID == ".amdhsa_dx10_clamp") {
3371       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3372                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3373     } else if (ID == ".amdhsa_ieee_mode") {
3374       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3375                        Val, ValRange);
3376     } else if (ID == ".amdhsa_fp16_overflow") {
3377       if (IVersion.Major < 9)
3378         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3379                                  IDRange);
3380       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3381                        ValRange);
3382     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3383       if (IVersion.Major < 10)
3384         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3385                                  IDRange);
3386       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3387                        ValRange);
3388     } else if (ID == ".amdhsa_memory_ordered") {
3389       if (IVersion.Major < 10)
3390         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3391                                  IDRange);
3392       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3393                        ValRange);
3394     } else if (ID == ".amdhsa_forward_progress") {
3395       if (IVersion.Major < 10)
3396         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3397                                  IDRange);
3398       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3399                        ValRange);
3400     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3401       PARSE_BITS_ENTRY(
3402           KD.compute_pgm_rsrc2,
3403           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3404           ValRange);
3405     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3406       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3407                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3408                        Val, ValRange);
3409     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3410       PARSE_BITS_ENTRY(
3411           KD.compute_pgm_rsrc2,
3412           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3413           ValRange);
3414     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3415       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3416                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3417                        Val, ValRange);
3418     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3419       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3420                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3421                        Val, ValRange);
3422     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3423       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3424                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3425                        Val, ValRange);
3426     } else if (ID == ".amdhsa_exception_int_div_zero") {
3427       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3428                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3429                        Val, ValRange);
3430     } else {
3431       return getParser().Error(IDRange.Start,
3432                                "unknown .amdhsa_kernel directive", IDRange);
3433     }
3434 
3435 #undef PARSE_BITS_ENTRY
3436   }
3437 
3438   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3439     return TokError(".amdhsa_next_free_vgpr directive is required");
3440 
3441   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3442     return TokError(".amdhsa_next_free_sgpr directive is required");
3443 
3444   unsigned VGPRBlocks;
3445   unsigned SGPRBlocks;
3446   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3447                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3448                          SGPRRange, VGPRBlocks, SGPRBlocks))
3449     return true;
3450 
3451   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3452           VGPRBlocks))
3453     return OutOfRangeError(VGPRRange);
3454   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3455                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3456 
3457   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3458           SGPRBlocks))
3459     return OutOfRangeError(SGPRRange);
3460   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3461                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3462                   SGPRBlocks);
3463 
3464   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3465     return TokError("too many user SGPRs enabled");
3466   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3467                   UserSGPRCount);
3468 
3469   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3470       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3471       ReserveFlatScr, ReserveXNACK);
3472   return false;
3473 }
3474 
3475 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3476   uint32_t Major;
3477   uint32_t Minor;
3478 
3479   if (ParseDirectiveMajorMinor(Major, Minor))
3480     return true;
3481 
3482   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3483   return false;
3484 }
3485 
3486 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3487   uint32_t Major;
3488   uint32_t Minor;
3489   uint32_t Stepping;
3490   StringRef VendorName;
3491   StringRef ArchName;
3492 
3493   // If this directive has no arguments, then use the ISA version for the
3494   // targeted GPU.
3495   if (getLexer().is(AsmToken::EndOfStatement)) {
3496     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3497     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3498                                                       ISA.Stepping,
3499                                                       "AMD", "AMDGPU");
3500     return false;
3501   }
3502 
3503   if (ParseDirectiveMajorMinor(Major, Minor))
3504     return true;
3505 
3506   if (getLexer().isNot(AsmToken::Comma))
3507     return TokError("stepping version number required, comma expected");
3508   Lex();
3509 
3510   if (ParseAsAbsoluteExpression(Stepping))
3511     return TokError("invalid stepping version");
3512 
3513   if (getLexer().isNot(AsmToken::Comma))
3514     return TokError("vendor name required, comma expected");
3515   Lex();
3516 
3517   if (getLexer().isNot(AsmToken::String))
3518     return TokError("invalid vendor name");
3519 
3520   VendorName = getLexer().getTok().getStringContents();
3521   Lex();
3522 
3523   if (getLexer().isNot(AsmToken::Comma))
3524     return TokError("arch name required, comma expected");
3525   Lex();
3526 
3527   if (getLexer().isNot(AsmToken::String))
3528     return TokError("invalid arch name");
3529 
3530   ArchName = getLexer().getTok().getStringContents();
3531   Lex();
3532 
3533   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3534                                                     VendorName, ArchName);
3535   return false;
3536 }
3537 
3538 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3539                                                amd_kernel_code_t &Header) {
3540   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3541   // assembly for backwards compatibility.
3542   if (ID == "max_scratch_backing_memory_byte_size") {
3543     Parser.eatToEndOfStatement();
3544     return false;
3545   }
3546 
3547   SmallString<40> ErrStr;
3548   raw_svector_ostream Err(ErrStr);
3549   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3550     return TokError(Err.str());
3551   }
3552   Lex();
3553 
3554   if (ID == "enable_wgp_mode") {
3555     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3556       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3557   }
3558 
3559   if (ID == "enable_mem_ordered") {
3560     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3561       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3562   }
3563 
3564   if (ID == "enable_fwd_progress") {
3565     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3566       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3567   }
3568 
3569   return false;
3570 }
3571 
3572 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3573   amd_kernel_code_t Header;
3574   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3575 
3576   while (true) {
3577     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3578     // will set the current token to EndOfStatement.
3579     while(getLexer().is(AsmToken::EndOfStatement))
3580       Lex();
3581 
3582     if (getLexer().isNot(AsmToken::Identifier))
3583       return TokError("expected value identifier or .end_amd_kernel_code_t");
3584 
3585     StringRef ID = getLexer().getTok().getIdentifier();
3586     Lex();
3587 
3588     if (ID == ".end_amd_kernel_code_t")
3589       break;
3590 
3591     if (ParseAMDKernelCodeTValue(ID, Header))
3592       return true;
3593   }
3594 
3595   getTargetStreamer().EmitAMDKernelCodeT(Header);
3596 
3597   return false;
3598 }
3599 
3600 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3601   if (getLexer().isNot(AsmToken::Identifier))
3602     return TokError("expected symbol name");
3603 
3604   StringRef KernelName = Parser.getTok().getString();
3605 
3606   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3607                                            ELF::STT_AMDGPU_HSA_KERNEL);
3608   Lex();
3609   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3610     KernelScope.initialize(getContext());
3611   return false;
3612 }
3613 
3614 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3615   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3616     return Error(getParser().getTok().getLoc(),
3617                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3618                  "architectures");
3619   }
3620 
3621   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3622 
3623   std::string ISAVersionStringFromSTI;
3624   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3625   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3626 
3627   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3628     return Error(getParser().getTok().getLoc(),
3629                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3630                  "arguments specified through the command line");
3631   }
3632 
3633   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3634   Lex();
3635 
3636   return false;
3637 }
3638 
3639 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3640   const char *AssemblerDirectiveBegin;
3641   const char *AssemblerDirectiveEnd;
3642   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3643       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3644           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3645                             HSAMD::V3::AssemblerDirectiveEnd)
3646           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3647                             HSAMD::AssemblerDirectiveEnd);
3648 
3649   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3650     return Error(getParser().getTok().getLoc(),
3651                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3652                  "not available on non-amdhsa OSes")).str());
3653   }
3654 
3655   std::string HSAMetadataString;
3656   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3657                           HSAMetadataString))
3658     return true;
3659 
3660   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3661     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3662       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3663   } else {
3664     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3665       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3666   }
3667 
3668   return false;
3669 }
3670 
3671 /// Common code to parse out a block of text (typically YAML) between start and
3672 /// end directives.
3673 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3674                                           const char *AssemblerDirectiveEnd,
3675                                           std::string &CollectString) {
3676 
3677   raw_string_ostream CollectStream(CollectString);
3678 
3679   getLexer().setSkipSpace(false);
3680 
3681   bool FoundEnd = false;
3682   while (!getLexer().is(AsmToken::Eof)) {
3683     while (getLexer().is(AsmToken::Space)) {
3684       CollectStream << getLexer().getTok().getString();
3685       Lex();
3686     }
3687 
3688     if (getLexer().is(AsmToken::Identifier)) {
3689       StringRef ID = getLexer().getTok().getIdentifier();
3690       if (ID == AssemblerDirectiveEnd) {
3691         Lex();
3692         FoundEnd = true;
3693         break;
3694       }
3695     }
3696 
3697     CollectStream << Parser.parseStringToEndOfStatement()
3698                   << getContext().getAsmInfo()->getSeparatorString();
3699 
3700     Parser.eatToEndOfStatement();
3701   }
3702 
3703   getLexer().setSkipSpace(true);
3704 
3705   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3706     return TokError(Twine("expected directive ") +
3707                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3708   }
3709 
3710   CollectStream.flush();
3711   return false;
3712 }
3713 
3714 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3715 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3716   std::string String;
3717   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3718                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3719     return true;
3720 
3721   auto PALMetadata = getTargetStreamer().getPALMetadata();
3722   if (!PALMetadata->setFromString(String))
3723     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3724   return false;
3725 }
3726 
3727 /// Parse the assembler directive for old linear-format PAL metadata.
3728 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3729   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3730     return Error(getParser().getTok().getLoc(),
3731                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3732                  "not available on non-amdpal OSes")).str());
3733   }
3734 
3735   auto PALMetadata = getTargetStreamer().getPALMetadata();
3736   PALMetadata->setLegacy();
3737   for (;;) {
3738     uint32_t Key, Value;
3739     if (ParseAsAbsoluteExpression(Key)) {
3740       return TokError(Twine("invalid value in ") +
3741                       Twine(PALMD::AssemblerDirective));
3742     }
3743     if (getLexer().isNot(AsmToken::Comma)) {
3744       return TokError(Twine("expected an even number of values in ") +
3745                       Twine(PALMD::AssemblerDirective));
3746     }
3747     Lex();
3748     if (ParseAsAbsoluteExpression(Value)) {
3749       return TokError(Twine("invalid value in ") +
3750                       Twine(PALMD::AssemblerDirective));
3751     }
3752     PALMetadata->setRegister(Key, Value);
3753     if (getLexer().isNot(AsmToken::Comma))
3754       break;
3755     Lex();
3756   }
3757   return false;
3758 }
3759 
3760 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3761   StringRef IDVal = DirectiveID.getString();
3762 
3763   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3764     if (IDVal == ".amdgcn_target")
3765       return ParseDirectiveAMDGCNTarget();
3766 
3767     if (IDVal == ".amdhsa_kernel")
3768       return ParseDirectiveAMDHSAKernel();
3769 
3770     // TODO: Restructure/combine with PAL metadata directive.
3771     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3772       return ParseDirectiveHSAMetadata();
3773   } else {
3774     if (IDVal == ".hsa_code_object_version")
3775       return ParseDirectiveHSACodeObjectVersion();
3776 
3777     if (IDVal == ".hsa_code_object_isa")
3778       return ParseDirectiveHSACodeObjectISA();
3779 
3780     if (IDVal == ".amd_kernel_code_t")
3781       return ParseDirectiveAMDKernelCodeT();
3782 
3783     if (IDVal == ".amdgpu_hsa_kernel")
3784       return ParseDirectiveAMDGPUHsaKernel();
3785 
3786     if (IDVal == ".amd_amdgpu_isa")
3787       return ParseDirectiveISAVersion();
3788 
3789     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3790       return ParseDirectiveHSAMetadata();
3791   }
3792 
3793   if (IDVal == PALMD::AssemblerDirectiveBegin)
3794     return ParseDirectivePALMetadataBegin();
3795 
3796   if (IDVal == PALMD::AssemblerDirective)
3797     return ParseDirectivePALMetadata();
3798 
3799   return true;
3800 }
3801 
3802 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3803                                            unsigned RegNo) const {
3804 
3805   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3806        R.isValid(); ++R) {
3807     if (*R == RegNo)
3808       return isGFX9() || isGFX10();
3809   }
3810 
3811   // GFX10 has 2 more SGPRs 104 and 105.
3812   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3813        R.isValid(); ++R) {
3814     if (*R == RegNo)
3815       return hasSGPR104_SGPR105();
3816   }
3817 
3818   switch (RegNo) {
3819   case AMDGPU::TBA:
3820   case AMDGPU::TBA_LO:
3821   case AMDGPU::TBA_HI:
3822   case AMDGPU::TMA:
3823   case AMDGPU::TMA_LO:
3824   case AMDGPU::TMA_HI:
3825     return !isGFX9() && !isGFX10();
3826   case AMDGPU::XNACK_MASK:
3827   case AMDGPU::XNACK_MASK_LO:
3828   case AMDGPU::XNACK_MASK_HI:
3829     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3830   case AMDGPU::SGPR_NULL:
3831     return isGFX10();
3832   default:
3833     break;
3834   }
3835 
3836   if (isInlineValue(RegNo))
3837     return !isCI() && !isSI() && !isVI();
3838 
3839   if (isCI())
3840     return true;
3841 
3842   if (isSI() || isGFX10()) {
3843     // No flat_scr on SI.
3844     // On GFX10 flat scratch is not a valid register operand and can only be
3845     // accessed with s_setreg/s_getreg.
3846     switch (RegNo) {
3847     case AMDGPU::FLAT_SCR:
3848     case AMDGPU::FLAT_SCR_LO:
3849     case AMDGPU::FLAT_SCR_HI:
3850       return false;
3851     default:
3852       return true;
3853     }
3854   }
3855 
3856   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3857   // SI/CI have.
3858   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3859        R.isValid(); ++R) {
3860     if (*R == RegNo)
3861       return hasSGPR102_SGPR103();
3862   }
3863 
3864   return true;
3865 }
3866 
3867 OperandMatchResultTy
3868 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
3869                               OperandMode Mode) {
3870   // Try to parse with a custom parser
3871   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3872 
3873   // If we successfully parsed the operand or if there as an error parsing,
3874   // we are done.
3875   //
3876   // If we are parsing after we reach EndOfStatement then this means we
3877   // are appending default values to the Operands list.  This is only done
3878   // by custom parser, so we shouldn't continue on to the generic parsing.
3879   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3880       getLexer().is(AsmToken::EndOfStatement))
3881     return ResTy;
3882 
3883   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
3884     unsigned Prefix = Operands.size();
3885     SMLoc LBraceLoc = getTok().getLoc();
3886     Parser.Lex(); // eat the '['
3887 
3888     for (;;) {
3889       ResTy = parseReg(Operands);
3890       if (ResTy != MatchOperand_Success)
3891         return ResTy;
3892 
3893       if (getLexer().is(AsmToken::RBrac))
3894         break;
3895 
3896       if (getLexer().isNot(AsmToken::Comma))
3897         return MatchOperand_ParseFail;
3898       Parser.Lex();
3899     }
3900 
3901     if (Operands.size() - Prefix > 1) {
3902       Operands.insert(Operands.begin() + Prefix,
3903                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
3904       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
3905                                                     getTok().getLoc()));
3906     }
3907 
3908     Parser.Lex(); // eat the ']'
3909     return MatchOperand_Success;
3910   }
3911 
3912   ResTy = parseRegOrImm(Operands);
3913 
3914   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3915     return ResTy;
3916 
3917   const auto &Tok = Parser.getTok();
3918   SMLoc S = Tok.getLoc();
3919 
3920   const MCExpr *Expr = nullptr;
3921   if (!Parser.parseExpression(Expr)) {
3922     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3923     return MatchOperand_Success;
3924   }
3925 
3926   // Possibly this is an instruction flag like 'gds'.
3927   if (Tok.getKind() == AsmToken::Identifier) {
3928     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3929     Parser.Lex();
3930     return MatchOperand_Success;
3931   }
3932 
3933   return MatchOperand_NoMatch;
3934 }
3935 
3936 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3937   // Clear any forced encodings from the previous instruction.
3938   setForcedEncodingSize(0);
3939   setForcedDPP(false);
3940   setForcedSDWA(false);
3941 
3942   if (Name.endswith("_e64")) {
3943     setForcedEncodingSize(64);
3944     return Name.substr(0, Name.size() - 4);
3945   } else if (Name.endswith("_e32")) {
3946     setForcedEncodingSize(32);
3947     return Name.substr(0, Name.size() - 4);
3948   } else if (Name.endswith("_dpp")) {
3949     setForcedDPP(true);
3950     return Name.substr(0, Name.size() - 4);
3951   } else if (Name.endswith("_sdwa")) {
3952     setForcedSDWA(true);
3953     return Name.substr(0, Name.size() - 5);
3954   }
3955   return Name;
3956 }
3957 
3958 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3959                                        StringRef Name,
3960                                        SMLoc NameLoc, OperandVector &Operands) {
3961   // Add the instruction mnemonic
3962   Name = parseMnemonicSuffix(Name);
3963   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3964 
3965   bool IsMIMG = Name.startswith("image_");
3966 
3967   while (!getLexer().is(AsmToken::EndOfStatement)) {
3968     OperandMode Mode = OperandMode_Default;
3969     if (IsMIMG && isGFX10() && Operands.size() == 2)
3970       Mode = OperandMode_NSA;
3971     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
3972 
3973     // Eat the comma or space if there is one.
3974     if (getLexer().is(AsmToken::Comma))
3975       Parser.Lex();
3976 
3977     switch (Res) {
3978       case MatchOperand_Success: break;
3979       case MatchOperand_ParseFail:
3980         Error(getLexer().getLoc(), "failed parsing operand.");
3981         while (!getLexer().is(AsmToken::EndOfStatement)) {
3982           Parser.Lex();
3983         }
3984         return true;
3985       case MatchOperand_NoMatch:
3986         Error(getLexer().getLoc(), "not a valid operand.");
3987         while (!getLexer().is(AsmToken::EndOfStatement)) {
3988           Parser.Lex();
3989         }
3990         return true;
3991     }
3992   }
3993 
3994   return false;
3995 }
3996 
3997 //===----------------------------------------------------------------------===//
3998 // Utility functions
3999 //===----------------------------------------------------------------------===//
4000 
4001 OperandMatchResultTy
4002 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
4003   switch(getLexer().getKind()) {
4004     default: return MatchOperand_NoMatch;
4005     case AsmToken::Identifier: {
4006       StringRef Name = Parser.getTok().getString();
4007       if (!Name.equals(Prefix)) {
4008         return MatchOperand_NoMatch;
4009       }
4010 
4011       Parser.Lex();
4012       if (getLexer().isNot(AsmToken::Colon))
4013         return MatchOperand_ParseFail;
4014 
4015       Parser.Lex();
4016 
4017       bool IsMinus = false;
4018       if (getLexer().getKind() == AsmToken::Minus) {
4019         Parser.Lex();
4020         IsMinus = true;
4021       }
4022 
4023       if (getLexer().isNot(AsmToken::Integer))
4024         return MatchOperand_ParseFail;
4025 
4026       if (getParser().parseAbsoluteExpression(Int))
4027         return MatchOperand_ParseFail;
4028 
4029       if (IsMinus)
4030         Int = -Int;
4031       break;
4032     }
4033   }
4034   return MatchOperand_Success;
4035 }
4036 
4037 OperandMatchResultTy
4038 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4039                                     AMDGPUOperand::ImmTy ImmTy,
4040                                     bool (*ConvertResult)(int64_t&)) {
4041   SMLoc S = Parser.getTok().getLoc();
4042   int64_t Value = 0;
4043 
4044   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4045   if (Res != MatchOperand_Success)
4046     return Res;
4047 
4048   if (ConvertResult && !ConvertResult(Value)) {
4049     return MatchOperand_ParseFail;
4050   }
4051 
4052   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4053   return MatchOperand_Success;
4054 }
4055 
4056 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
4057   const char *Prefix,
4058   OperandVector &Operands,
4059   AMDGPUOperand::ImmTy ImmTy,
4060   bool (*ConvertResult)(int64_t&)) {
4061   StringRef Name = Parser.getTok().getString();
4062   if (!Name.equals(Prefix))
4063     return MatchOperand_NoMatch;
4064 
4065   Parser.Lex();
4066   if (getLexer().isNot(AsmToken::Colon))
4067     return MatchOperand_ParseFail;
4068 
4069   Parser.Lex();
4070   if (getLexer().isNot(AsmToken::LBrac))
4071     return MatchOperand_ParseFail;
4072   Parser.Lex();
4073 
4074   unsigned Val = 0;
4075   SMLoc S = Parser.getTok().getLoc();
4076 
4077   // FIXME: How to verify the number of elements matches the number of src
4078   // operands?
4079   for (int I = 0; I < 4; ++I) {
4080     if (I != 0) {
4081       if (getLexer().is(AsmToken::RBrac))
4082         break;
4083 
4084       if (getLexer().isNot(AsmToken::Comma))
4085         return MatchOperand_ParseFail;
4086       Parser.Lex();
4087     }
4088 
4089     if (getLexer().isNot(AsmToken::Integer))
4090       return MatchOperand_ParseFail;
4091 
4092     int64_t Op;
4093     if (getParser().parseAbsoluteExpression(Op))
4094       return MatchOperand_ParseFail;
4095 
4096     if (Op != 0 && Op != 1)
4097       return MatchOperand_ParseFail;
4098     Val |= (Op << I);
4099   }
4100 
4101   Parser.Lex();
4102   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4103   return MatchOperand_Success;
4104 }
4105 
4106 OperandMatchResultTy
4107 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4108                                AMDGPUOperand::ImmTy ImmTy) {
4109   int64_t Bit = 0;
4110   SMLoc S = Parser.getTok().getLoc();
4111 
4112   // We are at the end of the statement, and this is a default argument, so
4113   // use a default value.
4114   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4115     switch(getLexer().getKind()) {
4116       case AsmToken::Identifier: {
4117         StringRef Tok = Parser.getTok().getString();
4118         if (Tok == Name) {
4119           if (Tok == "r128" && isGFX9())
4120             Error(S, "r128 modifier is not supported on this GPU");
4121           if (Tok == "a16" && !isGFX9())
4122             Error(S, "a16 modifier is not supported on this GPU");
4123           Bit = 1;
4124           Parser.Lex();
4125         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4126           Bit = 0;
4127           Parser.Lex();
4128         } else {
4129           return MatchOperand_NoMatch;
4130         }
4131         break;
4132       }
4133       default:
4134         return MatchOperand_NoMatch;
4135     }
4136   }
4137 
4138   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4139     return MatchOperand_ParseFail;
4140 
4141   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4142   return MatchOperand_Success;
4143 }
4144 
4145 static void addOptionalImmOperand(
4146   MCInst& Inst, const OperandVector& Operands,
4147   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4148   AMDGPUOperand::ImmTy ImmT,
4149   int64_t Default = 0) {
4150   auto i = OptionalIdx.find(ImmT);
4151   if (i != OptionalIdx.end()) {
4152     unsigned Idx = i->second;
4153     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4154   } else {
4155     Inst.addOperand(MCOperand::createImm(Default));
4156   }
4157 }
4158 
4159 OperandMatchResultTy
4160 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4161   if (getLexer().isNot(AsmToken::Identifier)) {
4162     return MatchOperand_NoMatch;
4163   }
4164   StringRef Tok = Parser.getTok().getString();
4165   if (Tok != Prefix) {
4166     return MatchOperand_NoMatch;
4167   }
4168 
4169   Parser.Lex();
4170   if (getLexer().isNot(AsmToken::Colon)) {
4171     return MatchOperand_ParseFail;
4172   }
4173 
4174   Parser.Lex();
4175   if (getLexer().isNot(AsmToken::Identifier)) {
4176     return MatchOperand_ParseFail;
4177   }
4178 
4179   Value = Parser.getTok().getString();
4180   return MatchOperand_Success;
4181 }
4182 
4183 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4184 // values to live in a joint format operand in the MCInst encoding.
4185 OperandMatchResultTy
4186 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4187   SMLoc S = Parser.getTok().getLoc();
4188   int64_t Dfmt = 0, Nfmt = 0;
4189   // dfmt and nfmt can appear in either order, and each is optional.
4190   bool GotDfmt = false, GotNfmt = false;
4191   while (!GotDfmt || !GotNfmt) {
4192     if (!GotDfmt) {
4193       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4194       if (Res != MatchOperand_NoMatch) {
4195         if (Res != MatchOperand_Success)
4196           return Res;
4197         if (Dfmt >= 16) {
4198           Error(Parser.getTok().getLoc(), "out of range dfmt");
4199           return MatchOperand_ParseFail;
4200         }
4201         GotDfmt = true;
4202         Parser.Lex();
4203         continue;
4204       }
4205     }
4206     if (!GotNfmt) {
4207       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4208       if (Res != MatchOperand_NoMatch) {
4209         if (Res != MatchOperand_Success)
4210           return Res;
4211         if (Nfmt >= 8) {
4212           Error(Parser.getTok().getLoc(), "out of range nfmt");
4213           return MatchOperand_ParseFail;
4214         }
4215         GotNfmt = true;
4216         Parser.Lex();
4217         continue;
4218       }
4219     }
4220     break;
4221   }
4222   if (!GotDfmt && !GotNfmt)
4223     return MatchOperand_NoMatch;
4224   auto Format = Dfmt | Nfmt << 4;
4225   Operands.push_back(
4226       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4227   return MatchOperand_Success;
4228 }
4229 
4230 //===----------------------------------------------------------------------===//
4231 // ds
4232 //===----------------------------------------------------------------------===//
4233 
4234 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4235                                     const OperandVector &Operands) {
4236   OptionalImmIndexMap OptionalIdx;
4237 
4238   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4239     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4240 
4241     // Add the register arguments
4242     if (Op.isReg()) {
4243       Op.addRegOperands(Inst, 1);
4244       continue;
4245     }
4246 
4247     // Handle optional arguments
4248     OptionalIdx[Op.getImmTy()] = i;
4249   }
4250 
4251   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4252   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4253   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4254 
4255   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4256 }
4257 
4258 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4259                                 bool IsGdsHardcoded) {
4260   OptionalImmIndexMap OptionalIdx;
4261 
4262   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4263     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4264 
4265     // Add the register arguments
4266     if (Op.isReg()) {
4267       Op.addRegOperands(Inst, 1);
4268       continue;
4269     }
4270 
4271     if (Op.isToken() && Op.getToken() == "gds") {
4272       IsGdsHardcoded = true;
4273       continue;
4274     }
4275 
4276     // Handle optional arguments
4277     OptionalIdx[Op.getImmTy()] = i;
4278   }
4279 
4280   AMDGPUOperand::ImmTy OffsetType =
4281     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4282      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4283      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4284                                                       AMDGPUOperand::ImmTyOffset;
4285 
4286   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4287 
4288   if (!IsGdsHardcoded) {
4289     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4290   }
4291   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4292 }
4293 
4294 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4295   OptionalImmIndexMap OptionalIdx;
4296 
4297   unsigned OperandIdx[4];
4298   unsigned EnMask = 0;
4299   int SrcIdx = 0;
4300 
4301   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4302     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4303 
4304     // Add the register arguments
4305     if (Op.isReg()) {
4306       assert(SrcIdx < 4);
4307       OperandIdx[SrcIdx] = Inst.size();
4308       Op.addRegOperands(Inst, 1);
4309       ++SrcIdx;
4310       continue;
4311     }
4312 
4313     if (Op.isOff()) {
4314       assert(SrcIdx < 4);
4315       OperandIdx[SrcIdx] = Inst.size();
4316       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4317       ++SrcIdx;
4318       continue;
4319     }
4320 
4321     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4322       Op.addImmOperands(Inst, 1);
4323       continue;
4324     }
4325 
4326     if (Op.isToken() && Op.getToken() == "done")
4327       continue;
4328 
4329     // Handle optional arguments
4330     OptionalIdx[Op.getImmTy()] = i;
4331   }
4332 
4333   assert(SrcIdx == 4);
4334 
4335   bool Compr = false;
4336   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4337     Compr = true;
4338     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4339     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4340     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4341   }
4342 
4343   for (auto i = 0; i < SrcIdx; ++i) {
4344     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4345       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4346     }
4347   }
4348 
4349   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4350   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4351 
4352   Inst.addOperand(MCOperand::createImm(EnMask));
4353 }
4354 
4355 //===----------------------------------------------------------------------===//
4356 // s_waitcnt
4357 //===----------------------------------------------------------------------===//
4358 
4359 static bool
4360 encodeCnt(
4361   const AMDGPU::IsaVersion ISA,
4362   int64_t &IntVal,
4363   int64_t CntVal,
4364   bool Saturate,
4365   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4366   unsigned (*decode)(const IsaVersion &Version, unsigned))
4367 {
4368   bool Failed = false;
4369 
4370   IntVal = encode(ISA, IntVal, CntVal);
4371   if (CntVal != decode(ISA, IntVal)) {
4372     if (Saturate) {
4373       IntVal = encode(ISA, IntVal, -1);
4374     } else {
4375       Failed = true;
4376     }
4377   }
4378   return Failed;
4379 }
4380 
4381 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4382   StringRef CntName = Parser.getTok().getString();
4383   int64_t CntVal;
4384 
4385   Parser.Lex();
4386   if (getLexer().isNot(AsmToken::LParen))
4387     return true;
4388 
4389   Parser.Lex();
4390   if (getLexer().isNot(AsmToken::Integer))
4391     return true;
4392 
4393   SMLoc ValLoc = Parser.getTok().getLoc();
4394   if (getParser().parseAbsoluteExpression(CntVal))
4395     return true;
4396 
4397   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4398 
4399   bool Failed = true;
4400   bool Sat = CntName.endswith("_sat");
4401 
4402   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4403     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4404   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4405     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4406   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4407     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4408   }
4409 
4410   if (Failed) {
4411     Error(ValLoc, "too large value for " + CntName);
4412     return true;
4413   }
4414 
4415   if (getLexer().isNot(AsmToken::RParen)) {
4416     return true;
4417   }
4418 
4419   Parser.Lex();
4420   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4421     const AsmToken NextToken = getLexer().peekTok();
4422     if (NextToken.is(AsmToken::Identifier)) {
4423       Parser.Lex();
4424     }
4425   }
4426 
4427   return false;
4428 }
4429 
4430 OperandMatchResultTy
4431 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4432   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4433   int64_t Waitcnt = getWaitcntBitMask(ISA);
4434   SMLoc S = Parser.getTok().getLoc();
4435 
4436   switch(getLexer().getKind()) {
4437     default: return MatchOperand_ParseFail;
4438     case AsmToken::Integer:
4439       // The operand can be an integer value.
4440       if (getParser().parseAbsoluteExpression(Waitcnt))
4441         return MatchOperand_ParseFail;
4442       break;
4443 
4444     case AsmToken::Identifier:
4445       do {
4446         if (parseCnt(Waitcnt))
4447           return MatchOperand_ParseFail;
4448       } while(getLexer().isNot(AsmToken::EndOfStatement));
4449       break;
4450   }
4451   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4452   return MatchOperand_Success;
4453 }
4454 
4455 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4456                                           int64_t &Width) {
4457   using namespace llvm::AMDGPU::Hwreg;
4458 
4459   if (Parser.getTok().getString() != "hwreg")
4460     return true;
4461   Parser.Lex();
4462 
4463   if (getLexer().isNot(AsmToken::LParen))
4464     return true;
4465   Parser.Lex();
4466 
4467   if (getLexer().is(AsmToken::Identifier)) {
4468     HwReg.IsSymbolic = true;
4469     HwReg.Id = ID_UNKNOWN_;
4470     const StringRef tok = Parser.getTok().getString();
4471     int Last = ID_SYMBOLIC_LAST_;
4472     if (isSI() || isCI() || isVI())
4473       Last = ID_SYMBOLIC_FIRST_GFX9_;
4474     else if (isGFX9())
4475       Last = ID_SYMBOLIC_FIRST_GFX10_;
4476     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4477       if (tok == IdSymbolic[i]) {
4478         HwReg.Id = i;
4479         break;
4480       }
4481     }
4482     Parser.Lex();
4483   } else {
4484     HwReg.IsSymbolic = false;
4485     if (getLexer().isNot(AsmToken::Integer))
4486       return true;
4487     if (getParser().parseAbsoluteExpression(HwReg.Id))
4488       return true;
4489   }
4490 
4491   if (getLexer().is(AsmToken::RParen)) {
4492     Parser.Lex();
4493     return false;
4494   }
4495 
4496   // optional params
4497   if (getLexer().isNot(AsmToken::Comma))
4498     return true;
4499   Parser.Lex();
4500 
4501   if (getLexer().isNot(AsmToken::Integer))
4502     return true;
4503   if (getParser().parseAbsoluteExpression(Offset))
4504     return true;
4505 
4506   if (getLexer().isNot(AsmToken::Comma))
4507     return true;
4508   Parser.Lex();
4509 
4510   if (getLexer().isNot(AsmToken::Integer))
4511     return true;
4512   if (getParser().parseAbsoluteExpression(Width))
4513     return true;
4514 
4515   if (getLexer().isNot(AsmToken::RParen))
4516     return true;
4517   Parser.Lex();
4518 
4519   return false;
4520 }
4521 
4522 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4523   using namespace llvm::AMDGPU::Hwreg;
4524 
4525   int64_t Imm16Val = 0;
4526   SMLoc S = Parser.getTok().getLoc();
4527 
4528   switch(getLexer().getKind()) {
4529     default: return MatchOperand_NoMatch;
4530     case AsmToken::Integer:
4531       // The operand can be an integer value.
4532       if (getParser().parseAbsoluteExpression(Imm16Val))
4533         return MatchOperand_NoMatch;
4534       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4535         Error(S, "invalid immediate: only 16-bit values are legal");
4536         // Do not return error code, but create an imm operand anyway and proceed
4537         // to the next operand, if any. That avoids unneccessary error messages.
4538       }
4539       break;
4540 
4541     case AsmToken::Identifier: {
4542         OperandInfoTy HwReg(ID_UNKNOWN_);
4543         int64_t Offset = OFFSET_DEFAULT_;
4544         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4545         if (parseHwregConstruct(HwReg, Offset, Width))
4546           return MatchOperand_ParseFail;
4547         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4548           if (HwReg.IsSymbolic)
4549             Error(S, "invalid symbolic name of hardware register");
4550           else
4551             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4552         }
4553         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4554           Error(S, "invalid bit offset: only 5-bit values are legal");
4555         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4556           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4557         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4558       }
4559       break;
4560   }
4561   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4562   return MatchOperand_Success;
4563 }
4564 
4565 bool AMDGPUOperand::isSWaitCnt() const {
4566   return isImm();
4567 }
4568 
4569 bool AMDGPUOperand::isHwreg() const {
4570   return isImmTy(ImmTyHwreg);
4571 }
4572 
4573 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4574   using namespace llvm::AMDGPU::SendMsg;
4575 
4576   if (Parser.getTok().getString() != "sendmsg")
4577     return true;
4578   Parser.Lex();
4579 
4580   if (getLexer().isNot(AsmToken::LParen))
4581     return true;
4582   Parser.Lex();
4583 
4584   if (getLexer().is(AsmToken::Identifier)) {
4585     Msg.IsSymbolic = true;
4586     Msg.Id = ID_UNKNOWN_;
4587     const std::string tok = Parser.getTok().getString();
4588     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4589       switch(i) {
4590         default: continue; // Omit gaps.
4591         case ID_GS_ALLOC_REQ:
4592           if (isSI() || isCI() || isVI())
4593             continue;
4594           break;
4595         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4596         case ID_SYSMSG: break;
4597       }
4598       if (tok == IdSymbolic[i]) {
4599         Msg.Id = i;
4600         break;
4601       }
4602     }
4603     Parser.Lex();
4604   } else {
4605     Msg.IsSymbolic = false;
4606     if (getLexer().isNot(AsmToken::Integer))
4607       return true;
4608     if (getParser().parseAbsoluteExpression(Msg.Id))
4609       return true;
4610     if (getLexer().is(AsmToken::Integer))
4611       if (getParser().parseAbsoluteExpression(Msg.Id))
4612         Msg.Id = ID_UNKNOWN_;
4613   }
4614   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4615     return false;
4616 
4617   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4618     if (getLexer().isNot(AsmToken::RParen))
4619       return true;
4620     Parser.Lex();
4621     return false;
4622   }
4623 
4624   if (getLexer().isNot(AsmToken::Comma))
4625     return true;
4626   Parser.Lex();
4627 
4628   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4629   Operation.Id = ID_UNKNOWN_;
4630   if (getLexer().is(AsmToken::Identifier)) {
4631     Operation.IsSymbolic = true;
4632     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4633     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4634     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4635     const StringRef Tok = Parser.getTok().getString();
4636     for (int i = F; i < L; ++i) {
4637       if (Tok == S[i]) {
4638         Operation.Id = i;
4639         break;
4640       }
4641     }
4642     Parser.Lex();
4643   } else {
4644     Operation.IsSymbolic = false;
4645     if (getLexer().isNot(AsmToken::Integer))
4646       return true;
4647     if (getParser().parseAbsoluteExpression(Operation.Id))
4648       return true;
4649   }
4650 
4651   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4652     // Stream id is optional.
4653     if (getLexer().is(AsmToken::RParen)) {
4654       Parser.Lex();
4655       return false;
4656     }
4657 
4658     if (getLexer().isNot(AsmToken::Comma))
4659       return true;
4660     Parser.Lex();
4661 
4662     if (getLexer().isNot(AsmToken::Integer))
4663       return true;
4664     if (getParser().parseAbsoluteExpression(StreamId))
4665       return true;
4666   }
4667 
4668   if (getLexer().isNot(AsmToken::RParen))
4669     return true;
4670   Parser.Lex();
4671   return false;
4672 }
4673 
4674 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4675   if (getLexer().getKind() != AsmToken::Identifier)
4676     return MatchOperand_NoMatch;
4677 
4678   StringRef Str = Parser.getTok().getString();
4679   int Slot = StringSwitch<int>(Str)
4680     .Case("p10", 0)
4681     .Case("p20", 1)
4682     .Case("p0", 2)
4683     .Default(-1);
4684 
4685   SMLoc S = Parser.getTok().getLoc();
4686   if (Slot == -1)
4687     return MatchOperand_ParseFail;
4688 
4689   Parser.Lex();
4690   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4691                                               AMDGPUOperand::ImmTyInterpSlot));
4692   return MatchOperand_Success;
4693 }
4694 
4695 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4696   if (getLexer().getKind() != AsmToken::Identifier)
4697     return MatchOperand_NoMatch;
4698 
4699   StringRef Str = Parser.getTok().getString();
4700   if (!Str.startswith("attr"))
4701     return MatchOperand_NoMatch;
4702 
4703   StringRef Chan = Str.take_back(2);
4704   int AttrChan = StringSwitch<int>(Chan)
4705     .Case(".x", 0)
4706     .Case(".y", 1)
4707     .Case(".z", 2)
4708     .Case(".w", 3)
4709     .Default(-1);
4710   if (AttrChan == -1)
4711     return MatchOperand_ParseFail;
4712 
4713   Str = Str.drop_back(2).drop_front(4);
4714 
4715   uint8_t Attr;
4716   if (Str.getAsInteger(10, Attr))
4717     return MatchOperand_ParseFail;
4718 
4719   SMLoc S = Parser.getTok().getLoc();
4720   Parser.Lex();
4721   if (Attr > 63) {
4722     Error(S, "out of bounds attr");
4723     return MatchOperand_Success;
4724   }
4725 
4726   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4727 
4728   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4729                                               AMDGPUOperand::ImmTyInterpAttr));
4730   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4731                                               AMDGPUOperand::ImmTyAttrChan));
4732   return MatchOperand_Success;
4733 }
4734 
4735 void AMDGPUAsmParser::errorExpTgt() {
4736   Error(Parser.getTok().getLoc(), "invalid exp target");
4737 }
4738 
4739 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4740                                                       uint8_t &Val) {
4741   if (Str == "null") {
4742     Val = 9;
4743     return MatchOperand_Success;
4744   }
4745 
4746   if (Str.startswith("mrt")) {
4747     Str = Str.drop_front(3);
4748     if (Str == "z") { // == mrtz
4749       Val = 8;
4750       return MatchOperand_Success;
4751     }
4752 
4753     if (Str.getAsInteger(10, Val))
4754       return MatchOperand_ParseFail;
4755 
4756     if (Val > 7)
4757       errorExpTgt();
4758 
4759     return MatchOperand_Success;
4760   }
4761 
4762   if (Str.startswith("pos")) {
4763     Str = Str.drop_front(3);
4764     if (Str.getAsInteger(10, Val))
4765       return MatchOperand_ParseFail;
4766 
4767     if (Val > 3)
4768       errorExpTgt();
4769 
4770     Val += 12;
4771     return MatchOperand_Success;
4772   }
4773 
4774   if (Str.startswith("param")) {
4775     Str = Str.drop_front(5);
4776     if (Str.getAsInteger(10, Val))
4777       return MatchOperand_ParseFail;
4778 
4779     if (Val >= 32)
4780       errorExpTgt();
4781 
4782     Val += 32;
4783     return MatchOperand_Success;
4784   }
4785 
4786   if (Str.startswith("invalid_target_")) {
4787     Str = Str.drop_front(15);
4788     if (Str.getAsInteger(10, Val))
4789       return MatchOperand_ParseFail;
4790 
4791     errorExpTgt();
4792     return MatchOperand_Success;
4793   }
4794 
4795   return MatchOperand_NoMatch;
4796 }
4797 
4798 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4799   uint8_t Val;
4800   StringRef Str = Parser.getTok().getString();
4801 
4802   auto Res = parseExpTgtImpl(Str, Val);
4803   if (Res != MatchOperand_Success)
4804     return Res;
4805 
4806   SMLoc S = Parser.getTok().getLoc();
4807   Parser.Lex();
4808 
4809   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4810                                               AMDGPUOperand::ImmTyExpTgt));
4811   return MatchOperand_Success;
4812 }
4813 
4814 OperandMatchResultTy
4815 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4816   using namespace llvm::AMDGPU::SendMsg;
4817 
4818   int64_t Imm16Val = 0;
4819   SMLoc S = Parser.getTok().getLoc();
4820 
4821   switch(getLexer().getKind()) {
4822   default:
4823     return MatchOperand_NoMatch;
4824   case AsmToken::Integer:
4825     // The operand can be an integer value.
4826     if (getParser().parseAbsoluteExpression(Imm16Val))
4827       return MatchOperand_NoMatch;
4828     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4829       Error(S, "invalid immediate: only 16-bit values are legal");
4830       // Do not return error code, but create an imm operand anyway and proceed
4831       // to the next operand, if any. That avoids unneccessary error messages.
4832     }
4833     break;
4834   case AsmToken::Identifier: {
4835       OperandInfoTy Msg(ID_UNKNOWN_);
4836       OperandInfoTy Operation(OP_UNKNOWN_);
4837       int64_t StreamId = STREAM_ID_DEFAULT_;
4838       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4839         return MatchOperand_ParseFail;
4840       do {
4841         // Validate and encode message ID.
4842         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4843                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4844                 || Msg.Id == ID_SYSMSG)) {
4845           if (Msg.IsSymbolic)
4846             Error(S, "invalid/unsupported symbolic name of message");
4847           else
4848             Error(S, "invalid/unsupported code of message");
4849           break;
4850         }
4851         Imm16Val = (Msg.Id << ID_SHIFT_);
4852         // Validate and encode operation ID.
4853         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4854           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4855             if (Operation.IsSymbolic)
4856               Error(S, "invalid symbolic name of GS_OP");
4857             else
4858               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4859             break;
4860           }
4861           if (Operation.Id == OP_GS_NOP
4862               && Msg.Id != ID_GS_DONE) {
4863             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4864             break;
4865           }
4866           Imm16Val |= (Operation.Id << OP_SHIFT_);
4867         }
4868         if (Msg.Id == ID_SYSMSG) {
4869           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4870             if (Operation.IsSymbolic)
4871               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4872             else
4873               Error(S, "invalid/unsupported code of SYSMSG_OP");
4874             break;
4875           }
4876           Imm16Val |= (Operation.Id << OP_SHIFT_);
4877         }
4878         // Validate and encode stream ID.
4879         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4880           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4881             Error(S, "invalid stream id: only 2-bit values are legal");
4882             break;
4883           }
4884           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4885         }
4886       } while (false);
4887     }
4888     break;
4889   }
4890   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4891   return MatchOperand_Success;
4892 }
4893 
4894 bool AMDGPUOperand::isSendMsg() const {
4895   return isImmTy(ImmTySendMsg);
4896 }
4897 
4898 //===----------------------------------------------------------------------===//
4899 // parser helpers
4900 //===----------------------------------------------------------------------===//
4901 
4902 bool
4903 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4904   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4905 }
4906 
4907 bool
4908 AMDGPUAsmParser::isId(const StringRef Id) const {
4909   return isId(getToken(), Id);
4910 }
4911 
4912 bool
4913 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4914   return getTokenKind() == Kind;
4915 }
4916 
4917 bool
4918 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4919   if (isId(Id)) {
4920     lex();
4921     return true;
4922   }
4923   return false;
4924 }
4925 
4926 bool
4927 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4928   if (isToken(Kind)) {
4929     lex();
4930     return true;
4931   }
4932   return false;
4933 }
4934 
4935 bool
4936 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4937                            const StringRef ErrMsg) {
4938   if (!trySkipToken(Kind)) {
4939     Error(getLoc(), ErrMsg);
4940     return false;
4941   }
4942   return true;
4943 }
4944 
4945 bool
4946 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4947   return !getParser().parseAbsoluteExpression(Imm);
4948 }
4949 
4950 bool
4951 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4952   if (isToken(AsmToken::String)) {
4953     Val = getToken().getStringContents();
4954     lex();
4955     return true;
4956   } else {
4957     Error(getLoc(), ErrMsg);
4958     return false;
4959   }
4960 }
4961 
4962 AsmToken
4963 AMDGPUAsmParser::getToken() const {
4964   return Parser.getTok();
4965 }
4966 
4967 AsmToken
4968 AMDGPUAsmParser::peekToken() {
4969   return getLexer().peekTok();
4970 }
4971 
4972 void
4973 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4974   auto TokCount = getLexer().peekTokens(Tokens);
4975 
4976   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4977     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4978 }
4979 
4980 AsmToken::TokenKind
4981 AMDGPUAsmParser::getTokenKind() const {
4982   return getLexer().getKind();
4983 }
4984 
4985 SMLoc
4986 AMDGPUAsmParser::getLoc() const {
4987   return getToken().getLoc();
4988 }
4989 
4990 StringRef
4991 AMDGPUAsmParser::getTokenStr() const {
4992   return getToken().getString();
4993 }
4994 
4995 void
4996 AMDGPUAsmParser::lex() {
4997   Parser.Lex();
4998 }
4999 
5000 //===----------------------------------------------------------------------===//
5001 // swizzle
5002 //===----------------------------------------------------------------------===//
5003 
5004 LLVM_READNONE
5005 static unsigned
5006 encodeBitmaskPerm(const unsigned AndMask,
5007                   const unsigned OrMask,
5008                   const unsigned XorMask) {
5009   using namespace llvm::AMDGPU::Swizzle;
5010 
5011   return BITMASK_PERM_ENC |
5012          (AndMask << BITMASK_AND_SHIFT) |
5013          (OrMask  << BITMASK_OR_SHIFT)  |
5014          (XorMask << BITMASK_XOR_SHIFT);
5015 }
5016 
5017 bool
5018 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5019                                       const unsigned MinVal,
5020                                       const unsigned MaxVal,
5021                                       const StringRef ErrMsg) {
5022   for (unsigned i = 0; i < OpNum; ++i) {
5023     if (!skipToken(AsmToken::Comma, "expected a comma")){
5024       return false;
5025     }
5026     SMLoc ExprLoc = Parser.getTok().getLoc();
5027     if (!parseExpr(Op[i])) {
5028       return false;
5029     }
5030     if (Op[i] < MinVal || Op[i] > MaxVal) {
5031       Error(ExprLoc, ErrMsg);
5032       return false;
5033     }
5034   }
5035 
5036   return true;
5037 }
5038 
5039 bool
5040 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5041   using namespace llvm::AMDGPU::Swizzle;
5042 
5043   int64_t Lane[LANE_NUM];
5044   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5045                            "expected a 2-bit lane id")) {
5046     Imm = QUAD_PERM_ENC;
5047     for (unsigned I = 0; I < LANE_NUM; ++I) {
5048       Imm |= Lane[I] << (LANE_SHIFT * I);
5049     }
5050     return true;
5051   }
5052   return false;
5053 }
5054 
5055 bool
5056 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5057   using namespace llvm::AMDGPU::Swizzle;
5058 
5059   SMLoc S = Parser.getTok().getLoc();
5060   int64_t GroupSize;
5061   int64_t LaneIdx;
5062 
5063   if (!parseSwizzleOperands(1, &GroupSize,
5064                             2, 32,
5065                             "group size must be in the interval [2,32]")) {
5066     return false;
5067   }
5068   if (!isPowerOf2_64(GroupSize)) {
5069     Error(S, "group size must be a power of two");
5070     return false;
5071   }
5072   if (parseSwizzleOperands(1, &LaneIdx,
5073                            0, GroupSize - 1,
5074                            "lane id must be in the interval [0,group size - 1]")) {
5075     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5076     return true;
5077   }
5078   return false;
5079 }
5080 
5081 bool
5082 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5083   using namespace llvm::AMDGPU::Swizzle;
5084 
5085   SMLoc S = Parser.getTok().getLoc();
5086   int64_t GroupSize;
5087 
5088   if (!parseSwizzleOperands(1, &GroupSize,
5089       2, 32, "group size must be in the interval [2,32]")) {
5090     return false;
5091   }
5092   if (!isPowerOf2_64(GroupSize)) {
5093     Error(S, "group size must be a power of two");
5094     return false;
5095   }
5096 
5097   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5098   return true;
5099 }
5100 
5101 bool
5102 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5103   using namespace llvm::AMDGPU::Swizzle;
5104 
5105   SMLoc S = Parser.getTok().getLoc();
5106   int64_t GroupSize;
5107 
5108   if (!parseSwizzleOperands(1, &GroupSize,
5109       1, 16, "group size must be in the interval [1,16]")) {
5110     return false;
5111   }
5112   if (!isPowerOf2_64(GroupSize)) {
5113     Error(S, "group size must be a power of two");
5114     return false;
5115   }
5116 
5117   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5118   return true;
5119 }
5120 
5121 bool
5122 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5123   using namespace llvm::AMDGPU::Swizzle;
5124 
5125   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5126     return false;
5127   }
5128 
5129   StringRef Ctl;
5130   SMLoc StrLoc = Parser.getTok().getLoc();
5131   if (!parseString(Ctl)) {
5132     return false;
5133   }
5134   if (Ctl.size() != BITMASK_WIDTH) {
5135     Error(StrLoc, "expected a 5-character mask");
5136     return false;
5137   }
5138 
5139   unsigned AndMask = 0;
5140   unsigned OrMask = 0;
5141   unsigned XorMask = 0;
5142 
5143   for (size_t i = 0; i < Ctl.size(); ++i) {
5144     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5145     switch(Ctl[i]) {
5146     default:
5147       Error(StrLoc, "invalid mask");
5148       return false;
5149     case '0':
5150       break;
5151     case '1':
5152       OrMask |= Mask;
5153       break;
5154     case 'p':
5155       AndMask |= Mask;
5156       break;
5157     case 'i':
5158       AndMask |= Mask;
5159       XorMask |= Mask;
5160       break;
5161     }
5162   }
5163 
5164   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5165   return true;
5166 }
5167 
5168 bool
5169 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5170 
5171   SMLoc OffsetLoc = Parser.getTok().getLoc();
5172 
5173   if (!parseExpr(Imm)) {
5174     return false;
5175   }
5176   if (!isUInt<16>(Imm)) {
5177     Error(OffsetLoc, "expected a 16-bit offset");
5178     return false;
5179   }
5180   return true;
5181 }
5182 
5183 bool
5184 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5185   using namespace llvm::AMDGPU::Swizzle;
5186 
5187   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5188 
5189     SMLoc ModeLoc = Parser.getTok().getLoc();
5190     bool Ok = false;
5191 
5192     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5193       Ok = parseSwizzleQuadPerm(Imm);
5194     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5195       Ok = parseSwizzleBitmaskPerm(Imm);
5196     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5197       Ok = parseSwizzleBroadcast(Imm);
5198     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5199       Ok = parseSwizzleSwap(Imm);
5200     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5201       Ok = parseSwizzleReverse(Imm);
5202     } else {
5203       Error(ModeLoc, "expected a swizzle mode");
5204     }
5205 
5206     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5207   }
5208 
5209   return false;
5210 }
5211 
5212 OperandMatchResultTy
5213 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5214   SMLoc S = Parser.getTok().getLoc();
5215   int64_t Imm = 0;
5216 
5217   if (trySkipId("offset")) {
5218 
5219     bool Ok = false;
5220     if (skipToken(AsmToken::Colon, "expected a colon")) {
5221       if (trySkipId("swizzle")) {
5222         Ok = parseSwizzleMacro(Imm);
5223       } else {
5224         Ok = parseSwizzleOffset(Imm);
5225       }
5226     }
5227 
5228     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5229 
5230     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5231   } else {
5232     // Swizzle "offset" operand is optional.
5233     // If it is omitted, try parsing other optional operands.
5234     return parseOptionalOpr(Operands);
5235   }
5236 }
5237 
5238 bool
5239 AMDGPUOperand::isSwizzle() const {
5240   return isImmTy(ImmTySwizzle);
5241 }
5242 
5243 //===----------------------------------------------------------------------===//
5244 // VGPR Index Mode
5245 //===----------------------------------------------------------------------===//
5246 
5247 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5248 
5249   using namespace llvm::AMDGPU::VGPRIndexMode;
5250 
5251   if (trySkipToken(AsmToken::RParen)) {
5252     return OFF;
5253   }
5254 
5255   int64_t Imm = 0;
5256 
5257   while (true) {
5258     unsigned Mode = 0;
5259     SMLoc S = Parser.getTok().getLoc();
5260 
5261     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5262       if (trySkipId(IdSymbolic[ModeId])) {
5263         Mode = 1 << ModeId;
5264         break;
5265       }
5266     }
5267 
5268     if (Mode == 0) {
5269       Error(S, (Imm == 0)?
5270                "expected a VGPR index mode or a closing parenthesis" :
5271                "expected a VGPR index mode");
5272       break;
5273     }
5274 
5275     if (Imm & Mode) {
5276       Error(S, "duplicate VGPR index mode");
5277       break;
5278     }
5279     Imm |= Mode;
5280 
5281     if (trySkipToken(AsmToken::RParen))
5282       break;
5283     if (!skipToken(AsmToken::Comma,
5284                    "expected a comma or a closing parenthesis"))
5285       break;
5286   }
5287 
5288   return Imm;
5289 }
5290 
5291 OperandMatchResultTy
5292 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5293 
5294   int64_t Imm = 0;
5295   SMLoc S = Parser.getTok().getLoc();
5296 
5297   if (getLexer().getKind() == AsmToken::Identifier &&
5298       Parser.getTok().getString() == "gpr_idx" &&
5299       getLexer().peekTok().is(AsmToken::LParen)) {
5300 
5301     Parser.Lex();
5302     Parser.Lex();
5303 
5304     // If parse failed, trigger an error but do not return error code
5305     // to avoid excessive error messages.
5306     Imm = parseGPRIdxMacro();
5307 
5308   } else {
5309     if (getParser().parseAbsoluteExpression(Imm))
5310       return MatchOperand_NoMatch;
5311     if (Imm < 0 || !isUInt<4>(Imm)) {
5312       Error(S, "invalid immediate: only 4-bit values are legal");
5313     }
5314   }
5315 
5316   Operands.push_back(
5317       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5318   return MatchOperand_Success;
5319 }
5320 
5321 bool AMDGPUOperand::isGPRIdxMode() const {
5322   return isImmTy(ImmTyGprIdxMode);
5323 }
5324 
5325 //===----------------------------------------------------------------------===//
5326 // sopp branch targets
5327 //===----------------------------------------------------------------------===//
5328 
5329 OperandMatchResultTy
5330 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5331   SMLoc S = Parser.getTok().getLoc();
5332 
5333   switch (getLexer().getKind()) {
5334     default: return MatchOperand_ParseFail;
5335     case AsmToken::Integer: {
5336       int64_t Imm;
5337       if (getParser().parseAbsoluteExpression(Imm))
5338         return MatchOperand_ParseFail;
5339       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5340       return MatchOperand_Success;
5341     }
5342 
5343     case AsmToken::Identifier:
5344       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5345           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5346                                   Parser.getTok().getString()), getContext()), S));
5347       Parser.Lex();
5348       return MatchOperand_Success;
5349   }
5350 }
5351 
5352 //===----------------------------------------------------------------------===//
5353 // mubuf
5354 //===----------------------------------------------------------------------===//
5355 
5356 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5357   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5358 }
5359 
5360 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5361   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5362 }
5363 
5364 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5365   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5366 }
5367 
5368 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5369                                const OperandVector &Operands,
5370                                bool IsAtomic,
5371                                bool IsAtomicReturn,
5372                                bool IsLds) {
5373   bool IsLdsOpcode = IsLds;
5374   bool HasLdsModifier = false;
5375   OptionalImmIndexMap OptionalIdx;
5376   assert(IsAtomicReturn ? IsAtomic : true);
5377   unsigned FirstOperandIdx = 1;
5378 
5379   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5380     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5381 
5382     // Add the register arguments
5383     if (Op.isReg()) {
5384       Op.addRegOperands(Inst, 1);
5385       // Insert a tied src for atomic return dst.
5386       // This cannot be postponed as subsequent calls to
5387       // addImmOperands rely on correct number of MC operands.
5388       if (IsAtomicReturn && i == FirstOperandIdx)
5389         Op.addRegOperands(Inst, 1);
5390       continue;
5391     }
5392 
5393     // Handle the case where soffset is an immediate
5394     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5395       Op.addImmOperands(Inst, 1);
5396       continue;
5397     }
5398 
5399     HasLdsModifier |= Op.isLDS();
5400 
5401     // Handle tokens like 'offen' which are sometimes hard-coded into the
5402     // asm string.  There are no MCInst operands for these.
5403     if (Op.isToken()) {
5404       continue;
5405     }
5406     assert(Op.isImm());
5407 
5408     // Handle optional arguments
5409     OptionalIdx[Op.getImmTy()] = i;
5410   }
5411 
5412   // This is a workaround for an llvm quirk which may result in an
5413   // incorrect instruction selection. Lds and non-lds versions of
5414   // MUBUF instructions are identical except that lds versions
5415   // have mandatory 'lds' modifier. However this modifier follows
5416   // optional modifiers and llvm asm matcher regards this 'lds'
5417   // modifier as an optional one. As a result, an lds version
5418   // of opcode may be selected even if it has no 'lds' modifier.
5419   if (IsLdsOpcode && !HasLdsModifier) {
5420     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5421     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5422       Inst.setOpcode(NoLdsOpcode);
5423       IsLdsOpcode = false;
5424     }
5425   }
5426 
5427   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5428   if (!IsAtomic) { // glc is hard-coded.
5429     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5430   }
5431   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5432 
5433   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5434     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5435   }
5436 
5437   if (isGFX10())
5438     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5439 }
5440 
5441 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5442   OptionalImmIndexMap OptionalIdx;
5443 
5444   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5445     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5446 
5447     // Add the register arguments
5448     if (Op.isReg()) {
5449       Op.addRegOperands(Inst, 1);
5450       continue;
5451     }
5452 
5453     // Handle the case where soffset is an immediate
5454     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5455       Op.addImmOperands(Inst, 1);
5456       continue;
5457     }
5458 
5459     // Handle tokens like 'offen' which are sometimes hard-coded into the
5460     // asm string.  There are no MCInst operands for these.
5461     if (Op.isToken()) {
5462       continue;
5463     }
5464     assert(Op.isImm());
5465 
5466     // Handle optional arguments
5467     OptionalIdx[Op.getImmTy()] = i;
5468   }
5469 
5470   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5471                         AMDGPUOperand::ImmTyOffset);
5472   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5473   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5474   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5475   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5476 
5477   if (isGFX10())
5478     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5479 }
5480 
5481 //===----------------------------------------------------------------------===//
5482 // mimg
5483 //===----------------------------------------------------------------------===//
5484 
5485 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5486                               bool IsAtomic) {
5487   unsigned I = 1;
5488   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5489   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5490     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5491   }
5492 
5493   if (IsAtomic) {
5494     // Add src, same as dst
5495     assert(Desc.getNumDefs() == 1);
5496     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5497   }
5498 
5499   OptionalImmIndexMap OptionalIdx;
5500 
5501   for (unsigned E = Operands.size(); I != E; ++I) {
5502     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5503 
5504     // Add the register arguments
5505     if (Op.isReg()) {
5506       Op.addRegOperands(Inst, 1);
5507     } else if (Op.isImmModifier()) {
5508       OptionalIdx[Op.getImmTy()] = I;
5509     } else if (!Op.isToken()) {
5510       llvm_unreachable("unexpected operand type");
5511     }
5512   }
5513 
5514   bool IsGFX10 = isGFX10();
5515 
5516   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5517   if (IsGFX10)
5518     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5519   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5520   if (IsGFX10)
5521     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5522   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5523   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5524   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5525   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5526   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5527   if (!IsGFX10)
5528     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5529   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5530 }
5531 
5532 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5533   cvtMIMG(Inst, Operands, true);
5534 }
5535 
5536 //===----------------------------------------------------------------------===//
5537 // smrd
5538 //===----------------------------------------------------------------------===//
5539 
5540 bool AMDGPUOperand::isSMRDOffset8() const {
5541   return isImm() && isUInt<8>(getImm());
5542 }
5543 
5544 bool AMDGPUOperand::isSMRDOffset20() const {
5545   return isImm() && isUInt<20>(getImm());
5546 }
5547 
5548 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5549   // 32-bit literals are only supported on CI and we only want to use them
5550   // when the offset is > 8-bits.
5551   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5552 }
5553 
5554 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5555   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5556 }
5557 
5558 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5559   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5560 }
5561 
5562 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5563   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5564 }
5565 
5566 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5567   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5568 }
5569 
5570 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5571   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5572 }
5573 
5574 //===----------------------------------------------------------------------===//
5575 // vop3
5576 //===----------------------------------------------------------------------===//
5577 
5578 static bool ConvertOmodMul(int64_t &Mul) {
5579   if (Mul != 1 && Mul != 2 && Mul != 4)
5580     return false;
5581 
5582   Mul >>= 1;
5583   return true;
5584 }
5585 
5586 static bool ConvertOmodDiv(int64_t &Div) {
5587   if (Div == 1) {
5588     Div = 0;
5589     return true;
5590   }
5591 
5592   if (Div == 2) {
5593     Div = 3;
5594     return true;
5595   }
5596 
5597   return false;
5598 }
5599 
5600 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5601   if (BoundCtrl == 0) {
5602     BoundCtrl = 1;
5603     return true;
5604   }
5605 
5606   if (BoundCtrl == -1) {
5607     BoundCtrl = 0;
5608     return true;
5609   }
5610 
5611   return false;
5612 }
5613 
5614 // Note: the order in this table matches the order of operands in AsmString.
5615 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5616   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5617   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5618   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5619   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5620   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5621   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5622   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5623   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5624   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5625   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5626   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5627   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5628   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5629   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5630   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5631   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5632   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5633   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5634   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5635   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5636   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5637   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5638   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5639   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5640   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5641   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5642   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5643   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5644   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5645   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5646   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5647   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5648   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5649   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5650   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5651   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5652   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5653   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5654   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5655 };
5656 
5657 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5658   unsigned size = Operands.size();
5659   assert(size > 0);
5660 
5661   OperandMatchResultTy res = parseOptionalOpr(Operands);
5662 
5663   // This is a hack to enable hardcoded mandatory operands which follow
5664   // optional operands.
5665   //
5666   // Current design assumes that all operands after the first optional operand
5667   // are also optional. However implementation of some instructions violates
5668   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5669   //
5670   // To alleviate this problem, we have to (implicitly) parse extra operands
5671   // to make sure autogenerated parser of custom operands never hit hardcoded
5672   // mandatory operands.
5673 
5674   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5675 
5676     // We have parsed the first optional operand.
5677     // Parse as many operands as necessary to skip all mandatory operands.
5678 
5679     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5680       if (res != MatchOperand_Success ||
5681           getLexer().is(AsmToken::EndOfStatement)) break;
5682       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5683       res = parseOptionalOpr(Operands);
5684     }
5685   }
5686 
5687   return res;
5688 }
5689 
5690 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5691   OperandMatchResultTy res;
5692   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5693     // try to parse any optional operand here
5694     if (Op.IsBit) {
5695       res = parseNamedBit(Op.Name, Operands, Op.Type);
5696     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5697       res = parseOModOperand(Operands);
5698     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5699                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5700                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5701       res = parseSDWASel(Operands, Op.Name, Op.Type);
5702     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5703       res = parseSDWADstUnused(Operands);
5704     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5705                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5706                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5707                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5708       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5709                                         Op.ConvertResult);
5710     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5711       res = parseDim(Operands);
5712     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5713       res = parseDfmtNfmt(Operands);
5714     } else {
5715       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5716     }
5717     if (res != MatchOperand_NoMatch) {
5718       return res;
5719     }
5720   }
5721   return MatchOperand_NoMatch;
5722 }
5723 
5724 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5725   StringRef Name = Parser.getTok().getString();
5726   if (Name == "mul") {
5727     return parseIntWithPrefix("mul", Operands,
5728                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5729   }
5730 
5731   if (Name == "div") {
5732     return parseIntWithPrefix("div", Operands,
5733                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5734   }
5735 
5736   return MatchOperand_NoMatch;
5737 }
5738 
5739 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5740   cvtVOP3P(Inst, Operands);
5741 
5742   int Opc = Inst.getOpcode();
5743 
5744   int SrcNum;
5745   const int Ops[] = { AMDGPU::OpName::src0,
5746                       AMDGPU::OpName::src1,
5747                       AMDGPU::OpName::src2 };
5748   for (SrcNum = 0;
5749        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5750        ++SrcNum);
5751   assert(SrcNum > 0);
5752 
5753   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5754   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5755 
5756   if ((OpSel & (1 << SrcNum)) != 0) {
5757     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5758     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5759     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5760   }
5761 }
5762 
5763 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5764       // 1. This operand is input modifiers
5765   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5766       // 2. This is not last operand
5767       && Desc.NumOperands > (OpNum + 1)
5768       // 3. Next operand is register class
5769       && Desc.OpInfo[OpNum + 1].RegClass != -1
5770       // 4. Next register is not tied to any other operand
5771       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5772 }
5773 
5774 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5775 {
5776   OptionalImmIndexMap OptionalIdx;
5777   unsigned Opc = Inst.getOpcode();
5778 
5779   unsigned I = 1;
5780   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5781   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5782     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5783   }
5784 
5785   for (unsigned E = Operands.size(); I != E; ++I) {
5786     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5787     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5788       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5789     } else if (Op.isInterpSlot() ||
5790                Op.isInterpAttr() ||
5791                Op.isAttrChan()) {
5792       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5793     } else if (Op.isImmModifier()) {
5794       OptionalIdx[Op.getImmTy()] = I;
5795     } else {
5796       llvm_unreachable("unhandled operand type");
5797     }
5798   }
5799 
5800   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5801     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5802   }
5803 
5804   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5805     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5806   }
5807 
5808   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5809     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5810   }
5811 }
5812 
5813 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5814                               OptionalImmIndexMap &OptionalIdx) {
5815   unsigned Opc = Inst.getOpcode();
5816 
5817   unsigned I = 1;
5818   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5819   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5820     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5821   }
5822 
5823   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5824     // This instruction has src modifiers
5825     for (unsigned E = Operands.size(); I != E; ++I) {
5826       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5827       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5828         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5829       } else if (Op.isImmModifier()) {
5830         OptionalIdx[Op.getImmTy()] = I;
5831       } else if (Op.isRegOrImm()) {
5832         Op.addRegOrImmOperands(Inst, 1);
5833       } else {
5834         llvm_unreachable("unhandled operand type");
5835       }
5836     }
5837   } else {
5838     // No src modifiers
5839     for (unsigned E = Operands.size(); I != E; ++I) {
5840       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5841       if (Op.isMod()) {
5842         OptionalIdx[Op.getImmTy()] = I;
5843       } else {
5844         Op.addRegOrImmOperands(Inst, 1);
5845       }
5846     }
5847   }
5848 
5849   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5850     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5851   }
5852 
5853   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5854     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5855   }
5856 
5857   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5858   // it has src2 register operand that is tied to dst operand
5859   // we don't allow modifiers for this operand in assembler so src2_modifiers
5860   // should be 0.
5861   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5862       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5863       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5864       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5865       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5866       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5867       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5868     auto it = Inst.begin();
5869     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5870     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5871     ++it;
5872     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5873   }
5874 }
5875 
5876 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5877   OptionalImmIndexMap OptionalIdx;
5878   cvtVOP3(Inst, Operands, OptionalIdx);
5879 }
5880 
5881 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5882                                const OperandVector &Operands) {
5883   OptionalImmIndexMap OptIdx;
5884   const int Opc = Inst.getOpcode();
5885   const MCInstrDesc &Desc = MII.get(Opc);
5886 
5887   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5888 
5889   cvtVOP3(Inst, Operands, OptIdx);
5890 
5891   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5892     assert(!IsPacked);
5893     Inst.addOperand(Inst.getOperand(0));
5894   }
5895 
5896   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5897   // instruction, and then figure out where to actually put the modifiers
5898 
5899   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5900 
5901   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5902   if (OpSelHiIdx != -1) {
5903     int DefaultVal = IsPacked ? -1 : 0;
5904     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5905                           DefaultVal);
5906   }
5907 
5908   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5909   if (NegLoIdx != -1) {
5910     assert(IsPacked);
5911     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5912     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5913   }
5914 
5915   const int Ops[] = { AMDGPU::OpName::src0,
5916                       AMDGPU::OpName::src1,
5917                       AMDGPU::OpName::src2 };
5918   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5919                          AMDGPU::OpName::src1_modifiers,
5920                          AMDGPU::OpName::src2_modifiers };
5921 
5922   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5923 
5924   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5925   unsigned OpSelHi = 0;
5926   unsigned NegLo = 0;
5927   unsigned NegHi = 0;
5928 
5929   if (OpSelHiIdx != -1) {
5930     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5931   }
5932 
5933   if (NegLoIdx != -1) {
5934     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5935     NegLo = Inst.getOperand(NegLoIdx).getImm();
5936     NegHi = Inst.getOperand(NegHiIdx).getImm();
5937   }
5938 
5939   for (int J = 0; J < 3; ++J) {
5940     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5941     if (OpIdx == -1)
5942       break;
5943 
5944     uint32_t ModVal = 0;
5945 
5946     if ((OpSel & (1 << J)) != 0)
5947       ModVal |= SISrcMods::OP_SEL_0;
5948 
5949     if ((OpSelHi & (1 << J)) != 0)
5950       ModVal |= SISrcMods::OP_SEL_1;
5951 
5952     if ((NegLo & (1 << J)) != 0)
5953       ModVal |= SISrcMods::NEG;
5954 
5955     if ((NegHi & (1 << J)) != 0)
5956       ModVal |= SISrcMods::NEG_HI;
5957 
5958     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5959 
5960     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5961   }
5962 }
5963 
5964 //===----------------------------------------------------------------------===//
5965 // dpp
5966 //===----------------------------------------------------------------------===//
5967 
5968 bool AMDGPUOperand::isDPPCtrl() const {
5969   using namespace AMDGPU::DPP;
5970 
5971   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5972   if (result) {
5973     int64_t Imm = getImm();
5974     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5975            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5976            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5977            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5978            (Imm == DppCtrl::WAVE_SHL1) ||
5979            (Imm == DppCtrl::WAVE_ROL1) ||
5980            (Imm == DppCtrl::WAVE_SHR1) ||
5981            (Imm == DppCtrl::WAVE_ROR1) ||
5982            (Imm == DppCtrl::ROW_MIRROR) ||
5983            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5984            (Imm == DppCtrl::BCAST15) ||
5985            (Imm == DppCtrl::BCAST31);
5986   }
5987   return false;
5988 }
5989 
5990 bool AMDGPUOperand::isS16Imm() const {
5991   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5992 }
5993 
5994 bool AMDGPUOperand::isU16Imm() const {
5995   return isImm() && isUInt<16>(getImm());
5996 }
5997 
5998 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
5999   if (!isGFX10())
6000     return MatchOperand_NoMatch;
6001 
6002   SMLoc S = Parser.getTok().getLoc();
6003 
6004   if (getLexer().isNot(AsmToken::Identifier))
6005     return MatchOperand_NoMatch;
6006   if (getLexer().getTok().getString() != "dim")
6007     return MatchOperand_NoMatch;
6008 
6009   Parser.Lex();
6010   if (getLexer().isNot(AsmToken::Colon))
6011     return MatchOperand_ParseFail;
6012 
6013   Parser.Lex();
6014 
6015   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6016   // integer.
6017   std::string Token;
6018   if (getLexer().is(AsmToken::Integer)) {
6019     SMLoc Loc = getLexer().getTok().getEndLoc();
6020     Token = getLexer().getTok().getString();
6021     Parser.Lex();
6022     if (getLexer().getTok().getLoc() != Loc)
6023       return MatchOperand_ParseFail;
6024   }
6025   if (getLexer().isNot(AsmToken::Identifier))
6026     return MatchOperand_ParseFail;
6027   Token += getLexer().getTok().getString();
6028 
6029   StringRef DimId = Token;
6030   if (DimId.startswith("SQ_RSRC_IMG_"))
6031     DimId = DimId.substr(12);
6032 
6033   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6034   if (!DimInfo)
6035     return MatchOperand_ParseFail;
6036 
6037   Parser.Lex();
6038 
6039   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6040                                               AMDGPUOperand::ImmTyDim));
6041   return MatchOperand_Success;
6042 }
6043 
6044 OperandMatchResultTy
6045 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6046   using namespace AMDGPU::DPP;
6047 
6048   SMLoc S = Parser.getTok().getLoc();
6049   StringRef Prefix;
6050   int64_t Int;
6051 
6052   if (getLexer().getKind() == AsmToken::Identifier) {
6053     Prefix = Parser.getTok().getString();
6054   } else {
6055     return MatchOperand_NoMatch;
6056   }
6057 
6058   if (Prefix == "row_mirror") {
6059     Int = DppCtrl::ROW_MIRROR;
6060     Parser.Lex();
6061   } else if (Prefix == "row_half_mirror") {
6062     Int = DppCtrl::ROW_HALF_MIRROR;
6063     Parser.Lex();
6064   } else {
6065     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6066     if (Prefix != "quad_perm"
6067         && Prefix != "row_shl"
6068         && Prefix != "row_shr"
6069         && Prefix != "row_ror"
6070         && Prefix != "wave_shl"
6071         && Prefix != "wave_rol"
6072         && Prefix != "wave_shr"
6073         && Prefix != "wave_ror"
6074         && Prefix != "row_bcast") {
6075       return MatchOperand_NoMatch;
6076     }
6077 
6078     Parser.Lex();
6079     if (getLexer().isNot(AsmToken::Colon))
6080       return MatchOperand_ParseFail;
6081 
6082     if (Prefix == "quad_perm") {
6083       // quad_perm:[%d,%d,%d,%d]
6084       Parser.Lex();
6085       if (getLexer().isNot(AsmToken::LBrac))
6086         return MatchOperand_ParseFail;
6087       Parser.Lex();
6088 
6089       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6090         return MatchOperand_ParseFail;
6091 
6092       for (int i = 0; i < 3; ++i) {
6093         if (getLexer().isNot(AsmToken::Comma))
6094           return MatchOperand_ParseFail;
6095         Parser.Lex();
6096 
6097         int64_t Temp;
6098         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6099           return MatchOperand_ParseFail;
6100         const int shift = i*2 + 2;
6101         Int += (Temp << shift);
6102       }
6103 
6104       if (getLexer().isNot(AsmToken::RBrac))
6105         return MatchOperand_ParseFail;
6106       Parser.Lex();
6107     } else {
6108       // sel:%d
6109       Parser.Lex();
6110       if (getParser().parseAbsoluteExpression(Int))
6111         return MatchOperand_ParseFail;
6112 
6113       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6114         Int |= DppCtrl::ROW_SHL0;
6115       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6116         Int |= DppCtrl::ROW_SHR0;
6117       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6118         Int |= DppCtrl::ROW_ROR0;
6119       } else if (Prefix == "wave_shl" && 1 == Int) {
6120         Int = DppCtrl::WAVE_SHL1;
6121       } else if (Prefix == "wave_rol" && 1 == Int) {
6122         Int = DppCtrl::WAVE_ROL1;
6123       } else if (Prefix == "wave_shr" && 1 == Int) {
6124         Int = DppCtrl::WAVE_SHR1;
6125       } else if (Prefix == "wave_ror" && 1 == Int) {
6126         Int = DppCtrl::WAVE_ROR1;
6127       } else if (Prefix == "row_bcast") {
6128         if (Int == 15) {
6129           Int = DppCtrl::BCAST15;
6130         } else if (Int == 31) {
6131           Int = DppCtrl::BCAST31;
6132         } else {
6133           return MatchOperand_ParseFail;
6134         }
6135       } else {
6136         return MatchOperand_ParseFail;
6137       }
6138     }
6139   }
6140 
6141   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6142   return MatchOperand_Success;
6143 }
6144 
6145 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6146   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6147 }
6148 
6149 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6150   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6151 }
6152 
6153 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6154   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6155 }
6156 
6157 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6158   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6159 }
6160 
6161 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
6162   OptionalImmIndexMap OptionalIdx;
6163 
6164   unsigned I = 1;
6165   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6166   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6167     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6168   }
6169 
6170   for (unsigned E = Operands.size(); I != E; ++I) {
6171     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6172                                             MCOI::TIED_TO);
6173     if (TiedTo != -1) {
6174       assert((unsigned)TiedTo < Inst.getNumOperands());
6175       // handle tied old or src2 for MAC instructions
6176       Inst.addOperand(Inst.getOperand(TiedTo));
6177     }
6178     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6179     // Add the register arguments
6180     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6181       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6182       // Skip it.
6183       continue;
6184     }
6185     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6186       Op.addRegWithFPInputModsOperands(Inst, 2);
6187     } else if (Op.isDPPCtrl()) {
6188       Op.addImmOperands(Inst, 1);
6189     } else if (Op.isImm()) {
6190       // Handle optional arguments
6191       OptionalIdx[Op.getImmTy()] = I;
6192     } else {
6193       llvm_unreachable("Invalid operand type");
6194     }
6195   }
6196 
6197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6199   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6200 }
6201 
6202 //===----------------------------------------------------------------------===//
6203 // sdwa
6204 //===----------------------------------------------------------------------===//
6205 
6206 OperandMatchResultTy
6207 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6208                               AMDGPUOperand::ImmTy Type) {
6209   using namespace llvm::AMDGPU::SDWA;
6210 
6211   SMLoc S = Parser.getTok().getLoc();
6212   StringRef Value;
6213   OperandMatchResultTy res;
6214 
6215   res = parseStringWithPrefix(Prefix, Value);
6216   if (res != MatchOperand_Success) {
6217     return res;
6218   }
6219 
6220   int64_t Int;
6221   Int = StringSwitch<int64_t>(Value)
6222         .Case("BYTE_0", SdwaSel::BYTE_0)
6223         .Case("BYTE_1", SdwaSel::BYTE_1)
6224         .Case("BYTE_2", SdwaSel::BYTE_2)
6225         .Case("BYTE_3", SdwaSel::BYTE_3)
6226         .Case("WORD_0", SdwaSel::WORD_0)
6227         .Case("WORD_1", SdwaSel::WORD_1)
6228         .Case("DWORD", SdwaSel::DWORD)
6229         .Default(0xffffffff);
6230   Parser.Lex(); // eat last token
6231 
6232   if (Int == 0xffffffff) {
6233     return MatchOperand_ParseFail;
6234   }
6235 
6236   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6237   return MatchOperand_Success;
6238 }
6239 
6240 OperandMatchResultTy
6241 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6242   using namespace llvm::AMDGPU::SDWA;
6243 
6244   SMLoc S = Parser.getTok().getLoc();
6245   StringRef Value;
6246   OperandMatchResultTy res;
6247 
6248   res = parseStringWithPrefix("dst_unused", Value);
6249   if (res != MatchOperand_Success) {
6250     return res;
6251   }
6252 
6253   int64_t Int;
6254   Int = StringSwitch<int64_t>(Value)
6255         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6256         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6257         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6258         .Default(0xffffffff);
6259   Parser.Lex(); // eat last token
6260 
6261   if (Int == 0xffffffff) {
6262     return MatchOperand_ParseFail;
6263   }
6264 
6265   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6266   return MatchOperand_Success;
6267 }
6268 
6269 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6270   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6271 }
6272 
6273 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6274   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6275 }
6276 
6277 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6278   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6279 }
6280 
6281 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6282   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6283 }
6284 
6285 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6286                               uint64_t BasicInstType, bool skipVcc) {
6287   using namespace llvm::AMDGPU::SDWA;
6288 
6289   OptionalImmIndexMap OptionalIdx;
6290   bool skippedVcc = false;
6291 
6292   unsigned I = 1;
6293   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6294   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6295     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6296   }
6297 
6298   for (unsigned E = Operands.size(); I != E; ++I) {
6299     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6300     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6301       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6302       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6303       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6304       // Skip VCC only if we didn't skip it on previous iteration.
6305       if (BasicInstType == SIInstrFlags::VOP2 &&
6306           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6307         skippedVcc = true;
6308         continue;
6309       } else if (BasicInstType == SIInstrFlags::VOPC &&
6310                  Inst.getNumOperands() == 0) {
6311         skippedVcc = true;
6312         continue;
6313       }
6314     }
6315     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6316       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6317     } else if (Op.isImm()) {
6318       // Handle optional arguments
6319       OptionalIdx[Op.getImmTy()] = I;
6320     } else {
6321       llvm_unreachable("Invalid operand type");
6322     }
6323     skippedVcc = false;
6324   }
6325 
6326   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6327       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6328       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6329     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6330     switch (BasicInstType) {
6331     case SIInstrFlags::VOP1:
6332       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6333       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6334         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6335       }
6336       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6337       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6338       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6339       break;
6340 
6341     case SIInstrFlags::VOP2:
6342       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6343       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6344         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6345       }
6346       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6347       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6348       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6349       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6350       break;
6351 
6352     case SIInstrFlags::VOPC:
6353       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6354         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6355       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6356       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6357       break;
6358 
6359     default:
6360       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6361     }
6362   }
6363 
6364   // special case v_mac_{f16, f32}:
6365   // it has src2 register operand that is tied to dst operand
6366   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6367       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6368     auto it = Inst.begin();
6369     std::advance(
6370       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6371     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6372   }
6373 }
6374 
6375 /// Force static initialization.
6376 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6377   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6378   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6379 }
6380 
6381 #define GET_REGISTER_MATCHER
6382 #define GET_MATCHER_IMPLEMENTATION
6383 #define GET_MNEMONIC_SPELL_CHECKER
6384 #include "AMDGPUGenAsmMatcher.inc"
6385 
6386 // This fuction should be defined after auto-generated include so that we have
6387 // MatchClassKind enum defined
6388 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6389                                                      unsigned Kind) {
6390   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6391   // But MatchInstructionImpl() expects to meet token and fails to validate
6392   // operand. This method checks if we are given immediate operand but expect to
6393   // get corresponding token.
6394   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6395   switch (Kind) {
6396   case MCK_addr64:
6397     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6398   case MCK_gds:
6399     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6400   case MCK_lds:
6401     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6402   case MCK_glc:
6403     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6404   case MCK_idxen:
6405     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6406   case MCK_offen:
6407     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6408   case MCK_SSrcB32:
6409     // When operands have expression values, they will return true for isToken,
6410     // because it is not possible to distinguish between a token and an
6411     // expression at parse time. MatchInstructionImpl() will always try to
6412     // match an operand as a token, when isToken returns true, and when the
6413     // name of the expression is not a valid token, the match will fail,
6414     // so we need to handle it here.
6415     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6416   case MCK_SSrcF32:
6417     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6418   case MCK_SoppBrTarget:
6419     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6420   case MCK_VReg32OrOff:
6421     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6422   case MCK_InterpSlot:
6423     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6424   case MCK_Attr:
6425     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6426   case MCK_AttrChan:
6427     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6428   default:
6429     return Match_InvalidOperand;
6430   }
6431 }
6432 
6433 //===----------------------------------------------------------------------===//
6434 // endpgm
6435 //===----------------------------------------------------------------------===//
6436 
6437 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6438   SMLoc S = Parser.getTok().getLoc();
6439   int64_t Imm = 0;
6440 
6441   if (!parseExpr(Imm)) {
6442     // The operand is optional, if not present default to 0
6443     Imm = 0;
6444   }
6445 
6446   if (!isUInt<16>(Imm)) {
6447     Error(S, "expected a 16-bit value");
6448     return MatchOperand_ParseFail;
6449   }
6450 
6451   Operands.push_back(
6452       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6453   return MatchOperand_Success;
6454 }
6455 
6456 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6457