1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0;
106       Operand |= Neg ? SISrcMods::NEG : 0;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyHigh
177   };
178 
179   struct TokOp {
180     const char *Data;
181     unsigned Length;
182   };
183 
184   struct ImmOp {
185     int64_t Val;
186     ImmTy Type;
187     bool IsFPImm;
188     Modifiers Mods;
189   };
190 
191   struct RegOp {
192     unsigned RegNo;
193     bool IsForcedVOP3;
194     Modifiers Mods;
195   };
196 
197   union {
198     TokOp Tok;
199     ImmOp Imm;
200     RegOp Reg;
201     const MCExpr *Expr;
202   };
203 
204   bool isToken() const override {
205     if (Kind == Token)
206       return true;
207 
208     if (Kind != Expression || !Expr)
209       return false;
210 
211     // When parsing operands, we can't always tell if something was meant to be
212     // a token, like 'gds', or an expression that references a global variable.
213     // In this case, we assume the string is an expression, and if we need to
214     // interpret is a token, then we treat the symbol name as the token.
215     return isa<MCSymbolRefExpr>(Expr);
216   }
217 
218   bool isImm() const override {
219     return Kind == Immediate;
220   }
221 
222   bool isInlinableImm(MVT type) const;
223   bool isLiteralImm(MVT type) const;
224 
225   bool isRegKind() const {
226     return Kind == Register;
227   }
228 
229   bool isReg() const override {
230     return isRegKind() && !hasModifiers();
231   }
232 
233   bool isRegOrImmWithInputMods(MVT type) const {
234     return isRegKind() || isInlinableImm(type);
235   }
236 
237   bool isRegOrImmWithInt16InputMods() const {
238     return isRegOrImmWithInputMods(MVT::i16);
239   }
240 
241   bool isRegOrImmWithInt32InputMods() const {
242     return isRegOrImmWithInputMods(MVT::i32);
243   }
244 
245   bool isRegOrImmWithInt64InputMods() const {
246     return isRegOrImmWithInputMods(MVT::i64);
247   }
248 
249   bool isRegOrImmWithFP16InputMods() const {
250     return isRegOrImmWithInputMods(MVT::f16);
251   }
252 
253   bool isRegOrImmWithFP32InputMods() const {
254     return isRegOrImmWithInputMods(MVT::f32);
255   }
256 
257   bool isRegOrImmWithFP64InputMods() const {
258     return isRegOrImmWithInputMods(MVT::f64);
259   }
260 
261   bool isVReg() const {
262     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
263            isRegClass(AMDGPU::VReg_64RegClassID) ||
264            isRegClass(AMDGPU::VReg_96RegClassID) ||
265            isRegClass(AMDGPU::VReg_128RegClassID) ||
266            isRegClass(AMDGPU::VReg_256RegClassID) ||
267            isRegClass(AMDGPU::VReg_512RegClassID);
268   }
269 
270   bool isVReg32OrOff() const {
271     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
272   }
273 
274   bool isSDWAOperand(MVT type) const;
275   bool isSDWAFP16Operand() const;
276   bool isSDWAFP32Operand() const;
277   bool isSDWAInt16Operand() const;
278   bool isSDWAInt32Operand() const;
279 
280   bool isImmTy(ImmTy ImmT) const {
281     return isImm() && Imm.Type == ImmT;
282   }
283 
284   bool isImmModifier() const {
285     return isImm() && Imm.Type != ImmTyNone;
286   }
287 
288   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
289   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
290   bool isDMask() const { return isImmTy(ImmTyDMask); }
291   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
292   bool isDA() const { return isImmTy(ImmTyDA); }
293   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
294   bool isLWE() const { return isImmTy(ImmTyLWE); }
295   bool isOff() const { return isImmTy(ImmTyOff); }
296   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
297   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
298   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
299   bool isOffen() const { return isImmTy(ImmTyOffen); }
300   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
301   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
302   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
303   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
304   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
305 
306   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
307   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
308   bool isGDS() const { return isImmTy(ImmTyGDS); }
309   bool isLDS() const { return isImmTy(ImmTyLDS); }
310   bool isGLC() const { return isImmTy(ImmTyGLC); }
311   bool isSLC() const { return isImmTy(ImmTySLC); }
312   bool isTFE() const { return isImmTy(ImmTyTFE); }
313   bool isD16() const { return isImmTy(ImmTyD16); }
314   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
315   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
316   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
317   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
318   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
319   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
320   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
321   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
322   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
323   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
324   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
325   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
326   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
327   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
328   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
329   bool isHigh() const { return isImmTy(ImmTyHigh); }
330 
331   bool isMod() const {
332     return isClampSI() || isOModSI();
333   }
334 
335   bool isRegOrImm() const {
336     return isReg() || isImm();
337   }
338 
339   bool isRegClass(unsigned RCID) const;
340 
341   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
342     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
343   }
344 
345   bool isSCSrcB16() const {
346     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
347   }
348 
349   bool isSCSrcV2B16() const {
350     return isSCSrcB16();
351   }
352 
353   bool isSCSrcB32() const {
354     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
355   }
356 
357   bool isSCSrcB64() const {
358     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
359   }
360 
361   bool isSCSrcF16() const {
362     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
363   }
364 
365   bool isSCSrcV2F16() const {
366     return isSCSrcF16();
367   }
368 
369   bool isSCSrcF32() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
371   }
372 
373   bool isSCSrcF64() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
375   }
376 
377   bool isSSrcB32() const {
378     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
379   }
380 
381   bool isSSrcB16() const {
382     return isSCSrcB16() || isLiteralImm(MVT::i16);
383   }
384 
385   bool isSSrcV2B16() const {
386     llvm_unreachable("cannot happen");
387     return isSSrcB16();
388   }
389 
390   bool isSSrcB64() const {
391     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
392     // See isVSrc64().
393     return isSCSrcB64() || isLiteralImm(MVT::i64);
394   }
395 
396   bool isSSrcF32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
398   }
399 
400   bool isSSrcF64() const {
401     return isSCSrcB64() || isLiteralImm(MVT::f64);
402   }
403 
404   bool isSSrcF16() const {
405     return isSCSrcB16() || isLiteralImm(MVT::f16);
406   }
407 
408   bool isSSrcV2F16() const {
409     llvm_unreachable("cannot happen");
410     return isSSrcF16();
411   }
412 
413   bool isVCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
415   }
416 
417   bool isVCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
419   }
420 
421   bool isVCSrcB16() const {
422     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
423   }
424 
425   bool isVCSrcV2B16() const {
426     return isVCSrcB16();
427   }
428 
429   bool isVCSrcF32() const {
430     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
431   }
432 
433   bool isVCSrcF64() const {
434     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
435   }
436 
437   bool isVCSrcF16() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
439   }
440 
441   bool isVCSrcV2F16() const {
442     return isVCSrcF16();
443   }
444 
445   bool isVSrcB32() const {
446     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
447   }
448 
449   bool isVSrcB64() const {
450     return isVCSrcF64() || isLiteralImm(MVT::i64);
451   }
452 
453   bool isVSrcB16() const {
454     return isVCSrcF16() || isLiteralImm(MVT::i16);
455   }
456 
457   bool isVSrcV2B16() const {
458     llvm_unreachable("cannot happen");
459     return isVSrcB16();
460   }
461 
462   bool isVSrcF32() const {
463     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
464   }
465 
466   bool isVSrcF64() const {
467     return isVCSrcF64() || isLiteralImm(MVT::f64);
468   }
469 
470   bool isVSrcF16() const {
471     return isVCSrcF16() || isLiteralImm(MVT::f16);
472   }
473 
474   bool isVSrcV2F16() const {
475     llvm_unreachable("cannot happen");
476     return isVSrcF16();
477   }
478 
479   bool isKImmFP32() const {
480     return isLiteralImm(MVT::f32);
481   }
482 
483   bool isKImmFP16() const {
484     return isLiteralImm(MVT::f16);
485   }
486 
487   bool isMem() const override {
488     return false;
489   }
490 
491   bool isExpr() const {
492     return Kind == Expression;
493   }
494 
495   bool isSoppBrTarget() const {
496     return isExpr() || isImm();
497   }
498 
499   bool isSWaitCnt() const;
500   bool isHwreg() const;
501   bool isSendMsg() const;
502   bool isSwizzle() const;
503   bool isSMRDOffset8() const;
504   bool isSMRDOffset20() const;
505   bool isSMRDLiteralOffset() const;
506   bool isDPPCtrl() const;
507   bool isGPRIdxMode() const;
508   bool isS16Imm() const;
509   bool isU16Imm() const;
510 
511   StringRef getExpressionAsToken() const {
512     assert(isExpr());
513     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
514     return S->getSymbol().getName();
515   }
516 
517   StringRef getToken() const {
518     assert(isToken());
519 
520     if (Kind == Expression)
521       return getExpressionAsToken();
522 
523     return StringRef(Tok.Data, Tok.Length);
524   }
525 
526   int64_t getImm() const {
527     assert(isImm());
528     return Imm.Val;
529   }
530 
531   ImmTy getImmTy() const {
532     assert(isImm());
533     return Imm.Type;
534   }
535 
536   unsigned getReg() const override {
537     return Reg.RegNo;
538   }
539 
540   SMLoc getStartLoc() const override {
541     return StartLoc;
542   }
543 
544   SMLoc getEndLoc() const override {
545     return EndLoc;
546   }
547 
548   SMRange getLocRange() const {
549     return SMRange(StartLoc, EndLoc);
550   }
551 
552   Modifiers getModifiers() const {
553     assert(isRegKind() || isImmTy(ImmTyNone));
554     return isRegKind() ? Reg.Mods : Imm.Mods;
555   }
556 
557   void setModifiers(Modifiers Mods) {
558     assert(isRegKind() || isImmTy(ImmTyNone));
559     if (isRegKind())
560       Reg.Mods = Mods;
561     else
562       Imm.Mods = Mods;
563   }
564 
565   bool hasModifiers() const {
566     return getModifiers().hasModifiers();
567   }
568 
569   bool hasFPModifiers() const {
570     return getModifiers().hasFPModifiers();
571   }
572 
573   bool hasIntModifiers() const {
574     return getModifiers().hasIntModifiers();
575   }
576 
577   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
578 
579   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
580 
581   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
582 
583   template <unsigned Bitwidth>
584   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
585 
586   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
587     addKImmFPOperands<16>(Inst, N);
588   }
589 
590   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
591     addKImmFPOperands<32>(Inst, N);
592   }
593 
594   void addRegOperands(MCInst &Inst, unsigned N) const;
595 
596   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
597     if (isRegKind())
598       addRegOperands(Inst, N);
599     else if (isExpr())
600       Inst.addOperand(MCOperand::createExpr(Expr));
601     else
602       addImmOperands(Inst, N);
603   }
604 
605   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
606     Modifiers Mods = getModifiers();
607     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
608     if (isRegKind()) {
609       addRegOperands(Inst, N);
610     } else {
611       addImmOperands(Inst, N, false);
612     }
613   }
614 
615   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
616     assert(!hasIntModifiers());
617     addRegOrImmWithInputModsOperands(Inst, N);
618   }
619 
620   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
621     assert(!hasFPModifiers());
622     addRegOrImmWithInputModsOperands(Inst, N);
623   }
624 
625   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
626     Modifiers Mods = getModifiers();
627     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
628     assert(isRegKind());
629     addRegOperands(Inst, N);
630   }
631 
632   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
633     assert(!hasIntModifiers());
634     addRegWithInputModsOperands(Inst, N);
635   }
636 
637   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
638     assert(!hasFPModifiers());
639     addRegWithInputModsOperands(Inst, N);
640   }
641 
642   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
643     if (isImm())
644       addImmOperands(Inst, N);
645     else {
646       assert(isExpr());
647       Inst.addOperand(MCOperand::createExpr(Expr));
648     }
649   }
650 
651   static void printImmTy(raw_ostream& OS, ImmTy Type) {
652     switch (Type) {
653     case ImmTyNone: OS << "None"; break;
654     case ImmTyGDS: OS << "GDS"; break;
655     case ImmTyLDS: OS << "LDS"; break;
656     case ImmTyOffen: OS << "Offen"; break;
657     case ImmTyIdxen: OS << "Idxen"; break;
658     case ImmTyAddr64: OS << "Addr64"; break;
659     case ImmTyOffset: OS << "Offset"; break;
660     case ImmTyInstOffset: OS << "InstOffset"; break;
661     case ImmTyOffset0: OS << "Offset0"; break;
662     case ImmTyOffset1: OS << "Offset1"; break;
663     case ImmTyGLC: OS << "GLC"; break;
664     case ImmTySLC: OS << "SLC"; break;
665     case ImmTyTFE: OS << "TFE"; break;
666     case ImmTyD16: OS << "D16"; break;
667     case ImmTyFORMAT: OS << "FORMAT"; break;
668     case ImmTyClampSI: OS << "ClampSI"; break;
669     case ImmTyOModSI: OS << "OModSI"; break;
670     case ImmTyDppCtrl: OS << "DppCtrl"; break;
671     case ImmTyDppRowMask: OS << "DppRowMask"; break;
672     case ImmTyDppBankMask: OS << "DppBankMask"; break;
673     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
674     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
675     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
676     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
677     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
678     case ImmTyDMask: OS << "DMask"; break;
679     case ImmTyUNorm: OS << "UNorm"; break;
680     case ImmTyDA: OS << "DA"; break;
681     case ImmTyR128A16: OS << "R128A16"; break;
682     case ImmTyLWE: OS << "LWE"; break;
683     case ImmTyOff: OS << "Off"; break;
684     case ImmTyExpTgt: OS << "ExpTgt"; break;
685     case ImmTyExpCompr: OS << "ExpCompr"; break;
686     case ImmTyExpVM: OS << "ExpVM"; break;
687     case ImmTyHwreg: OS << "Hwreg"; break;
688     case ImmTySendMsg: OS << "SendMsg"; break;
689     case ImmTyInterpSlot: OS << "InterpSlot"; break;
690     case ImmTyInterpAttr: OS << "InterpAttr"; break;
691     case ImmTyAttrChan: OS << "AttrChan"; break;
692     case ImmTyOpSel: OS << "OpSel"; break;
693     case ImmTyOpSelHi: OS << "OpSelHi"; break;
694     case ImmTyNegLo: OS << "NegLo"; break;
695     case ImmTyNegHi: OS << "NegHi"; break;
696     case ImmTySwizzle: OS << "Swizzle"; break;
697     case ImmTyHigh: OS << "High"; break;
698     }
699   }
700 
701   void print(raw_ostream &OS) const override {
702     switch (Kind) {
703     case Register:
704       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
705       break;
706     case Immediate:
707       OS << '<' << getImm();
708       if (getImmTy() != ImmTyNone) {
709         OS << " type: "; printImmTy(OS, getImmTy());
710       }
711       OS << " mods: " << Imm.Mods << '>';
712       break;
713     case Token:
714       OS << '\'' << getToken() << '\'';
715       break;
716     case Expression:
717       OS << "<expr " << *Expr << '>';
718       break;
719     }
720   }
721 
722   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
723                                       int64_t Val, SMLoc Loc,
724                                       ImmTy Type = ImmTyNone,
725                                       bool IsFPImm = false) {
726     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
727     Op->Imm.Val = Val;
728     Op->Imm.IsFPImm = IsFPImm;
729     Op->Imm.Type = Type;
730     Op->Imm.Mods = Modifiers();
731     Op->StartLoc = Loc;
732     Op->EndLoc = Loc;
733     return Op;
734   }
735 
736   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
737                                         StringRef Str, SMLoc Loc,
738                                         bool HasExplicitEncodingSize = true) {
739     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
740     Res->Tok.Data = Str.data();
741     Res->Tok.Length = Str.size();
742     Res->StartLoc = Loc;
743     Res->EndLoc = Loc;
744     return Res;
745   }
746 
747   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
748                                       unsigned RegNo, SMLoc S,
749                                       SMLoc E,
750                                       bool ForceVOP3) {
751     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
752     Op->Reg.RegNo = RegNo;
753     Op->Reg.Mods = Modifiers();
754     Op->Reg.IsForcedVOP3 = ForceVOP3;
755     Op->StartLoc = S;
756     Op->EndLoc = E;
757     return Op;
758   }
759 
760   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
761                                        const class MCExpr *Expr, SMLoc S) {
762     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
763     Op->Expr = Expr;
764     Op->StartLoc = S;
765     Op->EndLoc = S;
766     return Op;
767   }
768 };
769 
770 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
771   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
772   return OS;
773 }
774 
775 //===----------------------------------------------------------------------===//
776 // AsmParser
777 //===----------------------------------------------------------------------===//
778 
779 // Holds info related to the current kernel, e.g. count of SGPRs used.
780 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
781 // .amdgpu_hsa_kernel or at EOF.
782 class KernelScopeInfo {
783   int SgprIndexUnusedMin = -1;
784   int VgprIndexUnusedMin = -1;
785   MCContext *Ctx = nullptr;
786 
787   void usesSgprAt(int i) {
788     if (i >= SgprIndexUnusedMin) {
789       SgprIndexUnusedMin = ++i;
790       if (Ctx) {
791         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
792         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
793       }
794     }
795   }
796 
797   void usesVgprAt(int i) {
798     if (i >= VgprIndexUnusedMin) {
799       VgprIndexUnusedMin = ++i;
800       if (Ctx) {
801         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
802         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
803       }
804     }
805   }
806 
807 public:
808   KernelScopeInfo() = default;
809 
810   void initialize(MCContext &Context) {
811     Ctx = &Context;
812     usesSgprAt(SgprIndexUnusedMin = -1);
813     usesVgprAt(VgprIndexUnusedMin = -1);
814   }
815 
816   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
817     switch (RegKind) {
818       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
819       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
820       default: break;
821     }
822   }
823 };
824 
825 class AMDGPUAsmParser : public MCTargetAsmParser {
826   MCAsmParser &Parser;
827 
828   // Number of extra operands parsed after the first optional operand.
829   // This may be necessary to skip hardcoded mandatory operands.
830   static const unsigned MAX_OPR_LOOKAHEAD = 8;
831 
832   unsigned ForcedEncodingSize = 0;
833   bool ForcedDPP = false;
834   bool ForcedSDWA = false;
835   KernelScopeInfo KernelScope;
836 
837   /// @name Auto-generated Match Functions
838   /// {
839 
840 #define GET_ASSEMBLER_HEADER
841 #include "AMDGPUGenAsmMatcher.inc"
842 
843   /// }
844 
845 private:
846   bool ParseAsAbsoluteExpression(uint32_t &Ret);
847   bool OutOfRangeError(SMRange Range);
848   /// Calculate VGPR/SGPR blocks required for given target, reserved
849   /// registers, and user-specified NextFreeXGPR values.
850   ///
851   /// \param Features [in] Target features, used for bug corrections.
852   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
853   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
854   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
855   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
856   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
857   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
858   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
859   /// \param VGPRBlocks [out] Result VGPR block count.
860   /// \param SGPRBlocks [out] Result SGPR block count.
861   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
862                           bool FlatScrUsed, bool XNACKUsed,
863                           unsigned NextFreeVGPR, SMRange VGPRRange,
864                           unsigned NextFreeSGPR, SMRange SGPRRange,
865                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
866   bool ParseDirectiveAMDGCNTarget();
867   bool ParseDirectiveAMDHSAKernel();
868   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
869   bool ParseDirectiveHSACodeObjectVersion();
870   bool ParseDirectiveHSACodeObjectISA();
871   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
872   bool ParseDirectiveAMDKernelCodeT();
873   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
874   bool ParseDirectiveAMDGPUHsaKernel();
875 
876   bool ParseDirectiveISAVersion();
877   bool ParseDirectiveHSAMetadata();
878   bool ParseDirectivePALMetadata();
879 
880   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
881                              RegisterKind RegKind, unsigned Reg1,
882                              unsigned RegNum);
883   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
884                            unsigned& RegNum, unsigned& RegWidth,
885                            unsigned *DwordRegIndex);
886   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
887   void initializeGprCountSymbol(RegisterKind RegKind);
888   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
889                              unsigned RegWidth);
890   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
891                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
892   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
893                  bool IsGdsHardcoded);
894 
895 public:
896   enum AMDGPUMatchResultTy {
897     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
898   };
899 
900   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
901 
902   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
903                const MCInstrInfo &MII,
904                const MCTargetOptions &Options)
905       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
906     MCAsmParserExtension::Initialize(Parser);
907 
908     if (getFeatureBits().none()) {
909       // Set default features.
910       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
911     }
912 
913     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
914 
915     {
916       // TODO: make those pre-defined variables read-only.
917       // Currently there is none suitable machinery in the core llvm-mc for this.
918       // MCSymbol::isRedefinable is intended for another purpose, and
919       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
920       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
921       MCContext &Ctx = getContext();
922       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
923         MCSymbol *Sym =
924             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
925         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
926       } else {
927         MCSymbol *Sym =
928             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
929         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
930         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
931         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
932         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
933         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
934       }
935       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
936         initializeGprCountSymbol(IS_VGPR);
937         initializeGprCountSymbol(IS_SGPR);
938       } else
939         KernelScope.initialize(getContext());
940     }
941   }
942 
943   bool hasXNACK() const {
944     return AMDGPU::hasXNACK(getSTI());
945   }
946 
947   bool hasMIMG_R128() const {
948     return AMDGPU::hasMIMG_R128(getSTI());
949   }
950 
951   bool hasPackedD16() const {
952     return AMDGPU::hasPackedD16(getSTI());
953   }
954 
955   bool isSI() const {
956     return AMDGPU::isSI(getSTI());
957   }
958 
959   bool isCI() const {
960     return AMDGPU::isCI(getSTI());
961   }
962 
963   bool isVI() const {
964     return AMDGPU::isVI(getSTI());
965   }
966 
967   bool isGFX9() const {
968     return AMDGPU::isGFX9(getSTI());
969   }
970 
971   bool hasInv2PiInlineImm() const {
972     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
973   }
974 
975   bool hasFlatOffsets() const {
976     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
977   }
978 
979   bool hasSGPR102_SGPR103() const {
980     return !isVI();
981   }
982 
983   bool hasIntClamp() const {
984     return getFeatureBits()[AMDGPU::FeatureIntClamp];
985   }
986 
987   AMDGPUTargetStreamer &getTargetStreamer() {
988     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
989     return static_cast<AMDGPUTargetStreamer &>(TS);
990   }
991 
992   const MCRegisterInfo *getMRI() const {
993     // We need this const_cast because for some reason getContext() is not const
994     // in MCAsmParser.
995     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
996   }
997 
998   const MCInstrInfo *getMII() const {
999     return &MII;
1000   }
1001 
1002   const FeatureBitset &getFeatureBits() const {
1003     return getSTI().getFeatureBits();
1004   }
1005 
1006   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1007   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1008   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1009 
1010   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1011   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1012   bool isForcedDPP() const { return ForcedDPP; }
1013   bool isForcedSDWA() const { return ForcedSDWA; }
1014   ArrayRef<unsigned> getMatchedVariants() const;
1015 
1016   std::unique_ptr<AMDGPUOperand> parseRegister();
1017   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1018   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1019   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1020                                       unsigned Kind) override;
1021   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1022                                OperandVector &Operands, MCStreamer &Out,
1023                                uint64_t &ErrorInfo,
1024                                bool MatchingInlineAsm) override;
1025   bool ParseDirective(AsmToken DirectiveID) override;
1026   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1027   StringRef parseMnemonicSuffix(StringRef Name);
1028   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1029                         SMLoc NameLoc, OperandVector &Operands) override;
1030   //bool ProcessInstruction(MCInst &Inst);
1031 
1032   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1033 
1034   OperandMatchResultTy
1035   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1036                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1037                      bool (*ConvertResult)(int64_t &) = nullptr);
1038 
1039   OperandMatchResultTy parseOperandArrayWithPrefix(
1040     const char *Prefix,
1041     OperandVector &Operands,
1042     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1043     bool (*ConvertResult)(int64_t&) = nullptr);
1044 
1045   OperandMatchResultTy
1046   parseNamedBit(const char *Name, OperandVector &Operands,
1047                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1048   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1049                                              StringRef &Value);
1050 
1051   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1052   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1053   OperandMatchResultTy parseReg(OperandVector &Operands);
1054   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1055   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1056   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1057   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1058   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1059   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1060   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1061 
1062   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1063   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1064   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1065   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1066 
1067   bool parseCnt(int64_t &IntVal);
1068   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1069   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1070 
1071 private:
1072   struct OperandInfoTy {
1073     int64_t Id;
1074     bool IsSymbolic = false;
1075 
1076     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1077   };
1078 
1079   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1080   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1081 
1082   void errorExpTgt();
1083   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1084 
1085   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1086   bool validateSOPLiteral(const MCInst &Inst) const;
1087   bool validateConstantBusLimitations(const MCInst &Inst);
1088   bool validateEarlyClobberLimitations(const MCInst &Inst);
1089   bool validateIntClampSupported(const MCInst &Inst);
1090   bool validateMIMGAtomicDMask(const MCInst &Inst);
1091   bool validateMIMGGatherDMask(const MCInst &Inst);
1092   bool validateMIMGDataSize(const MCInst &Inst);
1093   bool validateMIMGD16(const MCInst &Inst);
1094   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1095   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1096   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1097 
1098   bool trySkipId(const StringRef Id);
1099   bool trySkipToken(const AsmToken::TokenKind Kind);
1100   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1101   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1102   bool parseExpr(int64_t &Imm);
1103 
1104 public:
1105   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1106   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1107 
1108   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1109   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1110   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1111   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1112   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1113 
1114   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1115                             const unsigned MinVal,
1116                             const unsigned MaxVal,
1117                             const StringRef ErrMsg);
1118   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1119   bool parseSwizzleOffset(int64_t &Imm);
1120   bool parseSwizzleMacro(int64_t &Imm);
1121   bool parseSwizzleQuadPerm(int64_t &Imm);
1122   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1123   bool parseSwizzleBroadcast(int64_t &Imm);
1124   bool parseSwizzleSwap(int64_t &Imm);
1125   bool parseSwizzleReverse(int64_t &Imm);
1126 
1127   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1128   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1129   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1130   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1131   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1132 
1133   AMDGPUOperand::Ptr defaultGLC() const;
1134   AMDGPUOperand::Ptr defaultSLC() const;
1135 
1136   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1137   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1138   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1139   AMDGPUOperand::Ptr defaultOffsetU12() const;
1140   AMDGPUOperand::Ptr defaultOffsetS13() const;
1141 
1142   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1143 
1144   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1145                OptionalImmIndexMap &OptionalIdx);
1146   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1147   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1148   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1149 
1150   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1151 
1152   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1153                bool IsAtomic = false);
1154   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1155 
1156   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1157   AMDGPUOperand::Ptr defaultRowMask() const;
1158   AMDGPUOperand::Ptr defaultBankMask() const;
1159   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1160   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1161 
1162   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1163                                     AMDGPUOperand::ImmTy Type);
1164   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1165   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1166   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1167   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1168   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1169   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1170                 uint64_t BasicInstType, bool skipVcc = false);
1171 };
1172 
1173 struct OptionalOperand {
1174   const char *Name;
1175   AMDGPUOperand::ImmTy Type;
1176   bool IsBit;
1177   bool (*ConvertResult)(int64_t&);
1178 };
1179 
1180 } // end anonymous namespace
1181 
1182 // May be called with integer type with equivalent bitwidth.
1183 static const fltSemantics *getFltSemantics(unsigned Size) {
1184   switch (Size) {
1185   case 4:
1186     return &APFloat::IEEEsingle();
1187   case 8:
1188     return &APFloat::IEEEdouble();
1189   case 2:
1190     return &APFloat::IEEEhalf();
1191   default:
1192     llvm_unreachable("unsupported fp type");
1193   }
1194 }
1195 
1196 static const fltSemantics *getFltSemantics(MVT VT) {
1197   return getFltSemantics(VT.getSizeInBits() / 8);
1198 }
1199 
1200 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1201   switch (OperandType) {
1202   case AMDGPU::OPERAND_REG_IMM_INT32:
1203   case AMDGPU::OPERAND_REG_IMM_FP32:
1204   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1205   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1206     return &APFloat::IEEEsingle();
1207   case AMDGPU::OPERAND_REG_IMM_INT64:
1208   case AMDGPU::OPERAND_REG_IMM_FP64:
1209   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1210   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1211     return &APFloat::IEEEdouble();
1212   case AMDGPU::OPERAND_REG_IMM_INT16:
1213   case AMDGPU::OPERAND_REG_IMM_FP16:
1214   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1215   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1216   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1217   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1218     return &APFloat::IEEEhalf();
1219   default:
1220     llvm_unreachable("unsupported fp type");
1221   }
1222 }
1223 
1224 //===----------------------------------------------------------------------===//
1225 // Operand
1226 //===----------------------------------------------------------------------===//
1227 
1228 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1229   bool Lost;
1230 
1231   // Convert literal to single precision
1232   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1233                                                APFloat::rmNearestTiesToEven,
1234                                                &Lost);
1235   // We allow precision lost but not overflow or underflow
1236   if (Status != APFloat::opOK &&
1237       Lost &&
1238       ((Status & APFloat::opOverflow)  != 0 ||
1239        (Status & APFloat::opUnderflow) != 0)) {
1240     return false;
1241   }
1242 
1243   return true;
1244 }
1245 
1246 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1247   if (!isImmTy(ImmTyNone)) {
1248     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1249     return false;
1250   }
1251   // TODO: We should avoid using host float here. It would be better to
1252   // check the float bit values which is what a few other places do.
1253   // We've had bot failures before due to weird NaN support on mips hosts.
1254 
1255   APInt Literal(64, Imm.Val);
1256 
1257   if (Imm.IsFPImm) { // We got fp literal token
1258     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1259       return AMDGPU::isInlinableLiteral64(Imm.Val,
1260                                           AsmParser->hasInv2PiInlineImm());
1261     }
1262 
1263     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1264     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1265       return false;
1266 
1267     if (type.getScalarSizeInBits() == 16) {
1268       return AMDGPU::isInlinableLiteral16(
1269         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1270         AsmParser->hasInv2PiInlineImm());
1271     }
1272 
1273     // Check if single precision literal is inlinable
1274     return AMDGPU::isInlinableLiteral32(
1275       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1276       AsmParser->hasInv2PiInlineImm());
1277   }
1278 
1279   // We got int literal token.
1280   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1281     return AMDGPU::isInlinableLiteral64(Imm.Val,
1282                                         AsmParser->hasInv2PiInlineImm());
1283   }
1284 
1285   if (type.getScalarSizeInBits() == 16) {
1286     return AMDGPU::isInlinableLiteral16(
1287       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1288       AsmParser->hasInv2PiInlineImm());
1289   }
1290 
1291   return AMDGPU::isInlinableLiteral32(
1292     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1293     AsmParser->hasInv2PiInlineImm());
1294 }
1295 
1296 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1297   // Check that this immediate can be added as literal
1298   if (!isImmTy(ImmTyNone)) {
1299     return false;
1300   }
1301 
1302   if (!Imm.IsFPImm) {
1303     // We got int literal token.
1304 
1305     if (type == MVT::f64 && hasFPModifiers()) {
1306       // Cannot apply fp modifiers to int literals preserving the same semantics
1307       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1308       // disable these cases.
1309       return false;
1310     }
1311 
1312     unsigned Size = type.getSizeInBits();
1313     if (Size == 64)
1314       Size = 32;
1315 
1316     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1317     // types.
1318     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1319   }
1320 
1321   // We got fp literal token
1322   if (type == MVT::f64) { // Expected 64-bit fp operand
1323     // We would set low 64-bits of literal to zeroes but we accept this literals
1324     return true;
1325   }
1326 
1327   if (type == MVT::i64) { // Expected 64-bit int operand
1328     // We don't allow fp literals in 64-bit integer instructions. It is
1329     // unclear how we should encode them.
1330     return false;
1331   }
1332 
1333   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1334   return canLosslesslyConvertToFPType(FPLiteral, type);
1335 }
1336 
1337 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1338   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1339 }
1340 
1341 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1342   if (AsmParser->isVI())
1343     return isVReg();
1344   else if (AsmParser->isGFX9())
1345     return isRegKind() || isInlinableImm(type);
1346   else
1347     return false;
1348 }
1349 
1350 bool AMDGPUOperand::isSDWAFP16Operand() const {
1351   return isSDWAOperand(MVT::f16);
1352 }
1353 
1354 bool AMDGPUOperand::isSDWAFP32Operand() const {
1355   return isSDWAOperand(MVT::f32);
1356 }
1357 
1358 bool AMDGPUOperand::isSDWAInt16Operand() const {
1359   return isSDWAOperand(MVT::i16);
1360 }
1361 
1362 bool AMDGPUOperand::isSDWAInt32Operand() const {
1363   return isSDWAOperand(MVT::i32);
1364 }
1365 
1366 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1367 {
1368   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1369   assert(Size == 2 || Size == 4 || Size == 8);
1370 
1371   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1372 
1373   if (Imm.Mods.Abs) {
1374     Val &= ~FpSignMask;
1375   }
1376   if (Imm.Mods.Neg) {
1377     Val ^= FpSignMask;
1378   }
1379 
1380   return Val;
1381 }
1382 
1383 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1384   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1385                              Inst.getNumOperands())) {
1386     addLiteralImmOperand(Inst, Imm.Val,
1387                          ApplyModifiers &
1388                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1389   } else {
1390     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1391     Inst.addOperand(MCOperand::createImm(Imm.Val));
1392   }
1393 }
1394 
1395 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1396   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1397   auto OpNum = Inst.getNumOperands();
1398   // Check that this operand accepts literals
1399   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1400 
1401   if (ApplyModifiers) {
1402     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1403     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1404     Val = applyInputFPModifiers(Val, Size);
1405   }
1406 
1407   APInt Literal(64, Val);
1408   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1409 
1410   if (Imm.IsFPImm) { // We got fp literal token
1411     switch (OpTy) {
1412     case AMDGPU::OPERAND_REG_IMM_INT64:
1413     case AMDGPU::OPERAND_REG_IMM_FP64:
1414     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1415     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1416       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1417                                        AsmParser->hasInv2PiInlineImm())) {
1418         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1419         return;
1420       }
1421 
1422       // Non-inlineable
1423       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1424         // For fp operands we check if low 32 bits are zeros
1425         if (Literal.getLoBits(32) != 0) {
1426           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1427           "Can't encode literal as exact 64-bit floating-point operand. "
1428           "Low 32-bits will be set to zero");
1429         }
1430 
1431         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1432         return;
1433       }
1434 
1435       // We don't allow fp literals in 64-bit integer instructions. It is
1436       // unclear how we should encode them. This case should be checked earlier
1437       // in predicate methods (isLiteralImm())
1438       llvm_unreachable("fp literal in 64-bit integer instruction.");
1439 
1440     case AMDGPU::OPERAND_REG_IMM_INT32:
1441     case AMDGPU::OPERAND_REG_IMM_FP32:
1442     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1443     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1444     case AMDGPU::OPERAND_REG_IMM_INT16:
1445     case AMDGPU::OPERAND_REG_IMM_FP16:
1446     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1447     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1448     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1449     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1450       bool lost;
1451       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1452       // Convert literal to single precision
1453       FPLiteral.convert(*getOpFltSemantics(OpTy),
1454                         APFloat::rmNearestTiesToEven, &lost);
1455       // We allow precision lost but not overflow or underflow. This should be
1456       // checked earlier in isLiteralImm()
1457 
1458       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1459       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1460           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1461         ImmVal |= (ImmVal << 16);
1462       }
1463 
1464       Inst.addOperand(MCOperand::createImm(ImmVal));
1465       return;
1466     }
1467     default:
1468       llvm_unreachable("invalid operand size");
1469     }
1470 
1471     return;
1472   }
1473 
1474    // We got int literal token.
1475   // Only sign extend inline immediates.
1476   // FIXME: No errors on truncation
1477   switch (OpTy) {
1478   case AMDGPU::OPERAND_REG_IMM_INT32:
1479   case AMDGPU::OPERAND_REG_IMM_FP32:
1480   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1481   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1482     if (isInt<32>(Val) &&
1483         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1484                                      AsmParser->hasInv2PiInlineImm())) {
1485       Inst.addOperand(MCOperand::createImm(Val));
1486       return;
1487     }
1488 
1489     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1490     return;
1491 
1492   case AMDGPU::OPERAND_REG_IMM_INT64:
1493   case AMDGPU::OPERAND_REG_IMM_FP64:
1494   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1495   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1496     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1497       Inst.addOperand(MCOperand::createImm(Val));
1498       return;
1499     }
1500 
1501     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1502     return;
1503 
1504   case AMDGPU::OPERAND_REG_IMM_INT16:
1505   case AMDGPU::OPERAND_REG_IMM_FP16:
1506   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1507   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1508     if (isInt<16>(Val) &&
1509         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1510                                      AsmParser->hasInv2PiInlineImm())) {
1511       Inst.addOperand(MCOperand::createImm(Val));
1512       return;
1513     }
1514 
1515     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1516     return;
1517 
1518   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1519   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1520     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1521     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1522                                         AsmParser->hasInv2PiInlineImm()));
1523 
1524     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1525                       static_cast<uint32_t>(LiteralVal);
1526     Inst.addOperand(MCOperand::createImm(ImmVal));
1527     return;
1528   }
1529   default:
1530     llvm_unreachable("invalid operand size");
1531   }
1532 }
1533 
1534 template <unsigned Bitwidth>
1535 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1536   APInt Literal(64, Imm.Val);
1537 
1538   if (!Imm.IsFPImm) {
1539     // We got int literal token.
1540     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1541     return;
1542   }
1543 
1544   bool Lost;
1545   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1546   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1547                     APFloat::rmNearestTiesToEven, &Lost);
1548   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1549 }
1550 
1551 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1552   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1553 }
1554 
1555 //===----------------------------------------------------------------------===//
1556 // AsmParser
1557 //===----------------------------------------------------------------------===//
1558 
1559 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1560   if (Is == IS_VGPR) {
1561     switch (RegWidth) {
1562       default: return -1;
1563       case 1: return AMDGPU::VGPR_32RegClassID;
1564       case 2: return AMDGPU::VReg_64RegClassID;
1565       case 3: return AMDGPU::VReg_96RegClassID;
1566       case 4: return AMDGPU::VReg_128RegClassID;
1567       case 8: return AMDGPU::VReg_256RegClassID;
1568       case 16: return AMDGPU::VReg_512RegClassID;
1569     }
1570   } else if (Is == IS_TTMP) {
1571     switch (RegWidth) {
1572       default: return -1;
1573       case 1: return AMDGPU::TTMP_32RegClassID;
1574       case 2: return AMDGPU::TTMP_64RegClassID;
1575       case 4: return AMDGPU::TTMP_128RegClassID;
1576       case 8: return AMDGPU::TTMP_256RegClassID;
1577       case 16: return AMDGPU::TTMP_512RegClassID;
1578     }
1579   } else if (Is == IS_SGPR) {
1580     switch (RegWidth) {
1581       default: return -1;
1582       case 1: return AMDGPU::SGPR_32RegClassID;
1583       case 2: return AMDGPU::SGPR_64RegClassID;
1584       case 4: return AMDGPU::SGPR_128RegClassID;
1585       case 8: return AMDGPU::SGPR_256RegClassID;
1586       case 16: return AMDGPU::SGPR_512RegClassID;
1587     }
1588   }
1589   return -1;
1590 }
1591 
1592 static unsigned getSpecialRegForName(StringRef RegName) {
1593   return StringSwitch<unsigned>(RegName)
1594     .Case("exec", AMDGPU::EXEC)
1595     .Case("vcc", AMDGPU::VCC)
1596     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1597     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1598     .Case("m0", AMDGPU::M0)
1599     .Case("scc", AMDGPU::SCC)
1600     .Case("tba", AMDGPU::TBA)
1601     .Case("tma", AMDGPU::TMA)
1602     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1603     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1604     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1605     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1606     .Case("vcc_lo", AMDGPU::VCC_LO)
1607     .Case("vcc_hi", AMDGPU::VCC_HI)
1608     .Case("exec_lo", AMDGPU::EXEC_LO)
1609     .Case("exec_hi", AMDGPU::EXEC_HI)
1610     .Case("tma_lo", AMDGPU::TMA_LO)
1611     .Case("tma_hi", AMDGPU::TMA_HI)
1612     .Case("tba_lo", AMDGPU::TBA_LO)
1613     .Case("tba_hi", AMDGPU::TBA_HI)
1614     .Default(0);
1615 }
1616 
1617 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1618                                     SMLoc &EndLoc) {
1619   auto R = parseRegister();
1620   if (!R) return true;
1621   assert(R->isReg());
1622   RegNo = R->getReg();
1623   StartLoc = R->getStartLoc();
1624   EndLoc = R->getEndLoc();
1625   return false;
1626 }
1627 
1628 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1629                                             RegisterKind RegKind, unsigned Reg1,
1630                                             unsigned RegNum) {
1631   switch (RegKind) {
1632   case IS_SPECIAL:
1633     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1634       Reg = AMDGPU::EXEC;
1635       RegWidth = 2;
1636       return true;
1637     }
1638     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1639       Reg = AMDGPU::FLAT_SCR;
1640       RegWidth = 2;
1641       return true;
1642     }
1643     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1644       Reg = AMDGPU::XNACK_MASK;
1645       RegWidth = 2;
1646       return true;
1647     }
1648     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1649       Reg = AMDGPU::VCC;
1650       RegWidth = 2;
1651       return true;
1652     }
1653     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1654       Reg = AMDGPU::TBA;
1655       RegWidth = 2;
1656       return true;
1657     }
1658     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1659       Reg = AMDGPU::TMA;
1660       RegWidth = 2;
1661       return true;
1662     }
1663     return false;
1664   case IS_VGPR:
1665   case IS_SGPR:
1666   case IS_TTMP:
1667     if (Reg1 != Reg + RegWidth) {
1668       return false;
1669     }
1670     RegWidth++;
1671     return true;
1672   default:
1673     llvm_unreachable("unexpected register kind");
1674   }
1675 }
1676 
1677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1678                                           unsigned &RegNum, unsigned &RegWidth,
1679                                           unsigned *DwordRegIndex) {
1680   if (DwordRegIndex) { *DwordRegIndex = 0; }
1681   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1682   if (getLexer().is(AsmToken::Identifier)) {
1683     StringRef RegName = Parser.getTok().getString();
1684     if ((Reg = getSpecialRegForName(RegName))) {
1685       Parser.Lex();
1686       RegKind = IS_SPECIAL;
1687     } else {
1688       unsigned RegNumIndex = 0;
1689       if (RegName[0] == 'v') {
1690         RegNumIndex = 1;
1691         RegKind = IS_VGPR;
1692       } else if (RegName[0] == 's') {
1693         RegNumIndex = 1;
1694         RegKind = IS_SGPR;
1695       } else if (RegName.startswith("ttmp")) {
1696         RegNumIndex = strlen("ttmp");
1697         RegKind = IS_TTMP;
1698       } else {
1699         return false;
1700       }
1701       if (RegName.size() > RegNumIndex) {
1702         // Single 32-bit register: vXX.
1703         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1704           return false;
1705         Parser.Lex();
1706         RegWidth = 1;
1707       } else {
1708         // Range of registers: v[XX:YY]. ":YY" is optional.
1709         Parser.Lex();
1710         int64_t RegLo, RegHi;
1711         if (getLexer().isNot(AsmToken::LBrac))
1712           return false;
1713         Parser.Lex();
1714 
1715         if (getParser().parseAbsoluteExpression(RegLo))
1716           return false;
1717 
1718         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1719         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1720           return false;
1721         Parser.Lex();
1722 
1723         if (isRBrace) {
1724           RegHi = RegLo;
1725         } else {
1726           if (getParser().parseAbsoluteExpression(RegHi))
1727             return false;
1728 
1729           if (getLexer().isNot(AsmToken::RBrac))
1730             return false;
1731           Parser.Lex();
1732         }
1733         RegNum = (unsigned) RegLo;
1734         RegWidth = (RegHi - RegLo) + 1;
1735       }
1736     }
1737   } else if (getLexer().is(AsmToken::LBrac)) {
1738     // List of consecutive registers: [s0,s1,s2,s3]
1739     Parser.Lex();
1740     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1741       return false;
1742     if (RegWidth != 1)
1743       return false;
1744     RegisterKind RegKind1;
1745     unsigned Reg1, RegNum1, RegWidth1;
1746     do {
1747       if (getLexer().is(AsmToken::Comma)) {
1748         Parser.Lex();
1749       } else if (getLexer().is(AsmToken::RBrac)) {
1750         Parser.Lex();
1751         break;
1752       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1753         if (RegWidth1 != 1) {
1754           return false;
1755         }
1756         if (RegKind1 != RegKind) {
1757           return false;
1758         }
1759         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1760           return false;
1761         }
1762       } else {
1763         return false;
1764       }
1765     } while (true);
1766   } else {
1767     return false;
1768   }
1769   switch (RegKind) {
1770   case IS_SPECIAL:
1771     RegNum = 0;
1772     RegWidth = 1;
1773     break;
1774   case IS_VGPR:
1775   case IS_SGPR:
1776   case IS_TTMP:
1777   {
1778     unsigned Size = 1;
1779     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1780       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1781       Size = std::min(RegWidth, 4u);
1782     }
1783     if (RegNum % Size != 0)
1784       return false;
1785     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1786     RegNum = RegNum / Size;
1787     int RCID = getRegClass(RegKind, RegWidth);
1788     if (RCID == -1)
1789       return false;
1790     const MCRegisterClass RC = TRI->getRegClass(RCID);
1791     if (RegNum >= RC.getNumRegs())
1792       return false;
1793     Reg = RC.getRegister(RegNum);
1794     break;
1795   }
1796 
1797   default:
1798     llvm_unreachable("unexpected register kind");
1799   }
1800 
1801   if (!subtargetHasRegister(*TRI, Reg))
1802     return false;
1803   return true;
1804 }
1805 
1806 Optional<StringRef>
1807 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1808   switch (RegKind) {
1809   case IS_VGPR:
1810     return StringRef(".amdgcn.next_free_vgpr");
1811   case IS_SGPR:
1812     return StringRef(".amdgcn.next_free_sgpr");
1813   default:
1814     return None;
1815   }
1816 }
1817 
1818 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1819   auto SymbolName = getGprCountSymbolName(RegKind);
1820   assert(SymbolName && "initializing invalid register kind");
1821   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1822   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1823 }
1824 
1825 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1826                                             unsigned DwordRegIndex,
1827                                             unsigned RegWidth) {
1828   // Symbols are only defined for GCN targets
1829   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1830     return true;
1831 
1832   auto SymbolName = getGprCountSymbolName(RegKind);
1833   if (!SymbolName)
1834     return true;
1835   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1836 
1837   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1838   int64_t OldCount;
1839 
1840   if (!Sym->isVariable())
1841     return !Error(getParser().getTok().getLoc(),
1842                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1843   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1844     return !Error(
1845         getParser().getTok().getLoc(),
1846         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1847 
1848   if (OldCount <= NewMax)
1849     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1850 
1851   return true;
1852 }
1853 
1854 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1855   const auto &Tok = Parser.getTok();
1856   SMLoc StartLoc = Tok.getLoc();
1857   SMLoc EndLoc = Tok.getEndLoc();
1858   RegisterKind RegKind;
1859   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1860 
1861   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1862     return nullptr;
1863   }
1864   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1865     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1866       return nullptr;
1867   } else
1868     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1869   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1870 }
1871 
1872 bool
1873 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1874   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1875       (getLexer().getKind() == AsmToken::Integer ||
1876        getLexer().getKind() == AsmToken::Real)) {
1877     // This is a workaround for handling operands like these:
1878     //     |1.0|
1879     //     |-1|
1880     // This syntax is not compatible with syntax of standard
1881     // MC expressions (due to the trailing '|').
1882 
1883     SMLoc EndLoc;
1884     const MCExpr *Expr;
1885 
1886     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1887       return true;
1888     }
1889 
1890     return !Expr->evaluateAsAbsolute(Val);
1891   }
1892 
1893   return getParser().parseAbsoluteExpression(Val);
1894 }
1895 
1896 OperandMatchResultTy
1897 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1898   // TODO: add syntactic sugar for 1/(2*PI)
1899   bool Minus = false;
1900   if (getLexer().getKind() == AsmToken::Minus) {
1901     const AsmToken NextToken = getLexer().peekTok();
1902     if (!NextToken.is(AsmToken::Integer) &&
1903         !NextToken.is(AsmToken::Real)) {
1904         return MatchOperand_NoMatch;
1905     }
1906     Minus = true;
1907     Parser.Lex();
1908   }
1909 
1910   SMLoc S = Parser.getTok().getLoc();
1911   switch(getLexer().getKind()) {
1912   case AsmToken::Integer: {
1913     int64_t IntVal;
1914     if (parseAbsoluteExpr(IntVal, AbsMod))
1915       return MatchOperand_ParseFail;
1916     if (Minus)
1917       IntVal *= -1;
1918     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1919     return MatchOperand_Success;
1920   }
1921   case AsmToken::Real: {
1922     int64_t IntVal;
1923     if (parseAbsoluteExpr(IntVal, AbsMod))
1924       return MatchOperand_ParseFail;
1925 
1926     APFloat F(BitsToDouble(IntVal));
1927     if (Minus)
1928       F.changeSign();
1929     Operands.push_back(
1930         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1931                                  AMDGPUOperand::ImmTyNone, true));
1932     return MatchOperand_Success;
1933   }
1934   default:
1935     return MatchOperand_NoMatch;
1936   }
1937 }
1938 
1939 OperandMatchResultTy
1940 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1941   if (auto R = parseRegister()) {
1942     assert(R->isReg());
1943     R->Reg.IsForcedVOP3 = isForcedVOP3();
1944     Operands.push_back(std::move(R));
1945     return MatchOperand_Success;
1946   }
1947   return MatchOperand_NoMatch;
1948 }
1949 
1950 OperandMatchResultTy
1951 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1952   auto res = parseImm(Operands, AbsMod);
1953   if (res != MatchOperand_NoMatch) {
1954     return res;
1955   }
1956 
1957   return parseReg(Operands);
1958 }
1959 
1960 OperandMatchResultTy
1961 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1962                                               bool AllowImm) {
1963   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1964 
1965   if (getLexer().getKind()== AsmToken::Minus) {
1966     const AsmToken NextToken = getLexer().peekTok();
1967 
1968     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1969     if (NextToken.is(AsmToken::Minus)) {
1970       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1971       return MatchOperand_ParseFail;
1972     }
1973 
1974     // '-' followed by an integer literal N should be interpreted as integer
1975     // negation rather than a floating-point NEG modifier applied to N.
1976     // Beside being contr-intuitive, such use of floating-point NEG modifier
1977     // results in different meaning of integer literals used with VOP1/2/C
1978     // and VOP3, for example:
1979     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1980     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1981     // Negative fp literals should be handled likewise for unifomtity
1982     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1983       Parser.Lex();
1984       Negate = true;
1985     }
1986   }
1987 
1988   if (getLexer().getKind() == AsmToken::Identifier &&
1989       Parser.getTok().getString() == "neg") {
1990     if (Negate) {
1991       Error(Parser.getTok().getLoc(), "expected register or immediate");
1992       return MatchOperand_ParseFail;
1993     }
1994     Parser.Lex();
1995     Negate2 = true;
1996     if (getLexer().isNot(AsmToken::LParen)) {
1997       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1998       return MatchOperand_ParseFail;
1999     }
2000     Parser.Lex();
2001   }
2002 
2003   if (getLexer().getKind() == AsmToken::Identifier &&
2004       Parser.getTok().getString() == "abs") {
2005     Parser.Lex();
2006     Abs2 = true;
2007     if (getLexer().isNot(AsmToken::LParen)) {
2008       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2009       return MatchOperand_ParseFail;
2010     }
2011     Parser.Lex();
2012   }
2013 
2014   if (getLexer().getKind() == AsmToken::Pipe) {
2015     if (Abs2) {
2016       Error(Parser.getTok().getLoc(), "expected register or immediate");
2017       return MatchOperand_ParseFail;
2018     }
2019     Parser.Lex();
2020     Abs = true;
2021   }
2022 
2023   OperandMatchResultTy Res;
2024   if (AllowImm) {
2025     Res = parseRegOrImm(Operands, Abs);
2026   } else {
2027     Res = parseReg(Operands);
2028   }
2029   if (Res != MatchOperand_Success) {
2030     return Res;
2031   }
2032 
2033   AMDGPUOperand::Modifiers Mods;
2034   if (Abs) {
2035     if (getLexer().getKind() != AsmToken::Pipe) {
2036       Error(Parser.getTok().getLoc(), "expected vertical bar");
2037       return MatchOperand_ParseFail;
2038     }
2039     Parser.Lex();
2040     Mods.Abs = true;
2041   }
2042   if (Abs2) {
2043     if (getLexer().isNot(AsmToken::RParen)) {
2044       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2045       return MatchOperand_ParseFail;
2046     }
2047     Parser.Lex();
2048     Mods.Abs = true;
2049   }
2050 
2051   if (Negate) {
2052     Mods.Neg = true;
2053   } else if (Negate2) {
2054     if (getLexer().isNot(AsmToken::RParen)) {
2055       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2056       return MatchOperand_ParseFail;
2057     }
2058     Parser.Lex();
2059     Mods.Neg = true;
2060   }
2061 
2062   if (Mods.hasFPModifiers()) {
2063     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2064     Op.setModifiers(Mods);
2065   }
2066   return MatchOperand_Success;
2067 }
2068 
2069 OperandMatchResultTy
2070 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2071                                                bool AllowImm) {
2072   bool Sext = false;
2073 
2074   if (getLexer().getKind() == AsmToken::Identifier &&
2075       Parser.getTok().getString() == "sext") {
2076     Parser.Lex();
2077     Sext = true;
2078     if (getLexer().isNot(AsmToken::LParen)) {
2079       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2080       return MatchOperand_ParseFail;
2081     }
2082     Parser.Lex();
2083   }
2084 
2085   OperandMatchResultTy Res;
2086   if (AllowImm) {
2087     Res = parseRegOrImm(Operands);
2088   } else {
2089     Res = parseReg(Operands);
2090   }
2091   if (Res != MatchOperand_Success) {
2092     return Res;
2093   }
2094 
2095   AMDGPUOperand::Modifiers Mods;
2096   if (Sext) {
2097     if (getLexer().isNot(AsmToken::RParen)) {
2098       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2099       return MatchOperand_ParseFail;
2100     }
2101     Parser.Lex();
2102     Mods.Sext = true;
2103   }
2104 
2105   if (Mods.hasIntModifiers()) {
2106     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2107     Op.setModifiers(Mods);
2108   }
2109 
2110   return MatchOperand_Success;
2111 }
2112 
2113 OperandMatchResultTy
2114 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2115   return parseRegOrImmWithFPInputMods(Operands, false);
2116 }
2117 
2118 OperandMatchResultTy
2119 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2120   return parseRegOrImmWithIntInputMods(Operands, false);
2121 }
2122 
2123 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2124   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2125   if (Reg) {
2126     Operands.push_back(std::move(Reg));
2127     return MatchOperand_Success;
2128   }
2129 
2130   const AsmToken &Tok = Parser.getTok();
2131   if (Tok.getString() == "off") {
2132     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2133                                                 AMDGPUOperand::ImmTyOff, false));
2134     Parser.Lex();
2135     return MatchOperand_Success;
2136   }
2137 
2138   return MatchOperand_NoMatch;
2139 }
2140 
2141 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2142   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2143 
2144   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2145       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2146       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2147       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2148     return Match_InvalidOperand;
2149 
2150   if ((TSFlags & SIInstrFlags::VOP3) &&
2151       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2152       getForcedEncodingSize() != 64)
2153     return Match_PreferE32;
2154 
2155   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2156       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2157     // v_mac_f32/16 allow only dst_sel == DWORD;
2158     auto OpNum =
2159         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2160     const auto &Op = Inst.getOperand(OpNum);
2161     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2162       return Match_InvalidOperand;
2163     }
2164   }
2165 
2166   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2167     // FIXME: Produces error without correct column reported.
2168     auto OpNum =
2169         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2170     const auto &Op = Inst.getOperand(OpNum);
2171     if (Op.getImm() != 0)
2172       return Match_InvalidOperand;
2173   }
2174 
2175   return Match_Success;
2176 }
2177 
2178 // What asm variants we should check
2179 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2180   if (getForcedEncodingSize() == 32) {
2181     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2182     return makeArrayRef(Variants);
2183   }
2184 
2185   if (isForcedVOP3()) {
2186     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2187     return makeArrayRef(Variants);
2188   }
2189 
2190   if (isForcedSDWA()) {
2191     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2192                                         AMDGPUAsmVariants::SDWA9};
2193     return makeArrayRef(Variants);
2194   }
2195 
2196   if (isForcedDPP()) {
2197     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2198     return makeArrayRef(Variants);
2199   }
2200 
2201   static const unsigned Variants[] = {
2202     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2203     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2204   };
2205 
2206   return makeArrayRef(Variants);
2207 }
2208 
2209 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2210   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2211   const unsigned Num = Desc.getNumImplicitUses();
2212   for (unsigned i = 0; i < Num; ++i) {
2213     unsigned Reg = Desc.ImplicitUses[i];
2214     switch (Reg) {
2215     case AMDGPU::FLAT_SCR:
2216     case AMDGPU::VCC:
2217     case AMDGPU::M0:
2218       return Reg;
2219     default:
2220       break;
2221     }
2222   }
2223   return AMDGPU::NoRegister;
2224 }
2225 
2226 // NB: This code is correct only when used to check constant
2227 // bus limitations because GFX7 support no f16 inline constants.
2228 // Note that there are no cases when a GFX7 opcode violates
2229 // constant bus limitations due to the use of an f16 constant.
2230 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2231                                        unsigned OpIdx) const {
2232   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2233 
2234   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2235     return false;
2236   }
2237 
2238   const MCOperand &MO = Inst.getOperand(OpIdx);
2239 
2240   int64_t Val = MO.getImm();
2241   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2242 
2243   switch (OpSize) { // expected operand size
2244   case 8:
2245     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2246   case 4:
2247     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2248   case 2: {
2249     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2250     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2251         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2252       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2253     } else {
2254       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2255     }
2256   }
2257   default:
2258     llvm_unreachable("invalid operand size");
2259   }
2260 }
2261 
2262 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2263   const MCOperand &MO = Inst.getOperand(OpIdx);
2264   if (MO.isImm()) {
2265     return !isInlineConstant(Inst, OpIdx);
2266   }
2267   return !MO.isReg() ||
2268          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2269 }
2270 
2271 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2272   const unsigned Opcode = Inst.getOpcode();
2273   const MCInstrDesc &Desc = MII.get(Opcode);
2274   unsigned ConstantBusUseCount = 0;
2275 
2276   if (Desc.TSFlags &
2277       (SIInstrFlags::VOPC |
2278        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2279        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2280        SIInstrFlags::SDWA)) {
2281     // Check special imm operands (used by madmk, etc)
2282     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2283       ++ConstantBusUseCount;
2284     }
2285 
2286     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2287     if (SGPRUsed != AMDGPU::NoRegister) {
2288       ++ConstantBusUseCount;
2289     }
2290 
2291     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2292     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2293     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2294 
2295     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2296 
2297     for (int OpIdx : OpIndices) {
2298       if (OpIdx == -1) break;
2299 
2300       const MCOperand &MO = Inst.getOperand(OpIdx);
2301       if (usesConstantBus(Inst, OpIdx)) {
2302         if (MO.isReg()) {
2303           const unsigned Reg = mc2PseudoReg(MO.getReg());
2304           // Pairs of registers with a partial intersections like these
2305           //   s0, s[0:1]
2306           //   flat_scratch_lo, flat_scratch
2307           //   flat_scratch_lo, flat_scratch_hi
2308           // are theoretically valid but they are disabled anyway.
2309           // Note that this code mimics SIInstrInfo::verifyInstruction
2310           if (Reg != SGPRUsed) {
2311             ++ConstantBusUseCount;
2312           }
2313           SGPRUsed = Reg;
2314         } else { // Expression or a literal
2315           ++ConstantBusUseCount;
2316         }
2317       }
2318     }
2319   }
2320 
2321   return ConstantBusUseCount <= 1;
2322 }
2323 
2324 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2325   const unsigned Opcode = Inst.getOpcode();
2326   const MCInstrDesc &Desc = MII.get(Opcode);
2327 
2328   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2329   if (DstIdx == -1 ||
2330       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2331     return true;
2332   }
2333 
2334   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2335 
2336   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2337   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2338   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2339 
2340   assert(DstIdx != -1);
2341   const MCOperand &Dst = Inst.getOperand(DstIdx);
2342   assert(Dst.isReg());
2343   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2344 
2345   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2346 
2347   for (int SrcIdx : SrcIndices) {
2348     if (SrcIdx == -1) break;
2349     const MCOperand &Src = Inst.getOperand(SrcIdx);
2350     if (Src.isReg()) {
2351       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2352       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2353         return false;
2354       }
2355     }
2356   }
2357 
2358   return true;
2359 }
2360 
2361 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2362 
2363   const unsigned Opc = Inst.getOpcode();
2364   const MCInstrDesc &Desc = MII.get(Opc);
2365 
2366   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2367     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2368     assert(ClampIdx != -1);
2369     return Inst.getOperand(ClampIdx).getImm() == 0;
2370   }
2371 
2372   return true;
2373 }
2374 
2375 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2376 
2377   const unsigned Opc = Inst.getOpcode();
2378   const MCInstrDesc &Desc = MII.get(Opc);
2379 
2380   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2381     return true;
2382 
2383   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2384   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2385   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2386 
2387   assert(VDataIdx != -1);
2388   assert(DMaskIdx != -1);
2389   assert(TFEIdx != -1);
2390 
2391   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2392   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2393   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2394   if (DMask == 0)
2395     DMask = 1;
2396 
2397   unsigned DataSize =
2398     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2399   if (hasPackedD16()) {
2400     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2401     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2402       DataSize = (DataSize + 1) / 2;
2403   }
2404 
2405   return (VDataSize / 4) == DataSize + TFESize;
2406 }
2407 
2408 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2409 
2410   const unsigned Opc = Inst.getOpcode();
2411   const MCInstrDesc &Desc = MII.get(Opc);
2412 
2413   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2414     return true;
2415   if (!Desc.mayLoad() || !Desc.mayStore())
2416     return true; // Not atomic
2417 
2418   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2419   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2420 
2421   // This is an incomplete check because image_atomic_cmpswap
2422   // may only use 0x3 and 0xf while other atomic operations
2423   // may use 0x1 and 0x3. However these limitations are
2424   // verified when we check that dmask matches dst size.
2425   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2426 }
2427 
2428 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2429 
2430   const unsigned Opc = Inst.getOpcode();
2431   const MCInstrDesc &Desc = MII.get(Opc);
2432 
2433   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2434     return true;
2435 
2436   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2437   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2438 
2439   // GATHER4 instructions use dmask in a different fashion compared to
2440   // other MIMG instructions. The only useful DMASK values are
2441   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2442   // (red,red,red,red) etc.) The ISA document doesn't mention
2443   // this.
2444   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2445 }
2446 
2447 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2448 
2449   const unsigned Opc = Inst.getOpcode();
2450   const MCInstrDesc &Desc = MII.get(Opc);
2451 
2452   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2453     return true;
2454 
2455   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2456   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2457     if (isCI() || isSI())
2458       return false;
2459   }
2460 
2461   return true;
2462 }
2463 
2464 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2465   unsigned Opcode = Inst.getOpcode();
2466   const MCInstrDesc &Desc = MII.get(Opcode);
2467   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2468     return true;
2469 
2470   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2471   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2472 
2473   const int OpIndices[] = { Src0Idx, Src1Idx };
2474 
2475   unsigned NumLiterals = 0;
2476   uint32_t LiteralValue;
2477 
2478   for (int OpIdx : OpIndices) {
2479     if (OpIdx == -1) break;
2480 
2481     const MCOperand &MO = Inst.getOperand(OpIdx);
2482     if (MO.isImm() &&
2483         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2484         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2485         !isInlineConstant(Inst, OpIdx)) {
2486       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2487       if (NumLiterals == 0 || LiteralValue != Value) {
2488         LiteralValue = Value;
2489         ++NumLiterals;
2490       }
2491     }
2492   }
2493 
2494   return NumLiterals <= 1;
2495 }
2496 
2497 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2498                                           const SMLoc &IDLoc) {
2499   if (!validateSOPLiteral(Inst)) {
2500     Error(IDLoc,
2501       "only one literal operand is allowed");
2502     return false;
2503   }
2504   if (!validateConstantBusLimitations(Inst)) {
2505     Error(IDLoc,
2506       "invalid operand (violates constant bus restrictions)");
2507     return false;
2508   }
2509   if (!validateEarlyClobberLimitations(Inst)) {
2510     Error(IDLoc,
2511       "destination must be different than all sources");
2512     return false;
2513   }
2514   if (!validateIntClampSupported(Inst)) {
2515     Error(IDLoc,
2516       "integer clamping is not supported on this GPU");
2517     return false;
2518   }
2519   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2520   if (!validateMIMGD16(Inst)) {
2521     Error(IDLoc,
2522       "d16 modifier is not supported on this GPU");
2523     return false;
2524   }
2525   if (!validateMIMGDataSize(Inst)) {
2526     Error(IDLoc,
2527       "image data size does not match dmask and tfe");
2528     return false;
2529   }
2530   if (!validateMIMGAtomicDMask(Inst)) {
2531     Error(IDLoc,
2532       "invalid atomic image dmask");
2533     return false;
2534   }
2535   if (!validateMIMGGatherDMask(Inst)) {
2536     Error(IDLoc,
2537       "invalid image_gather dmask: only one bit must be set");
2538     return false;
2539   }
2540 
2541   return true;
2542 }
2543 
2544 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2545                                             unsigned VariantID = 0);
2546 
2547 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2548                                               OperandVector &Operands,
2549                                               MCStreamer &Out,
2550                                               uint64_t &ErrorInfo,
2551                                               bool MatchingInlineAsm) {
2552   MCInst Inst;
2553   unsigned Result = Match_Success;
2554   for (auto Variant : getMatchedVariants()) {
2555     uint64_t EI;
2556     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2557                                   Variant);
2558     // We order match statuses from least to most specific. We use most specific
2559     // status as resulting
2560     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2561     if ((R == Match_Success) ||
2562         (R == Match_PreferE32) ||
2563         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2564         (R == Match_InvalidOperand && Result != Match_MissingFeature
2565                                    && Result != Match_PreferE32) ||
2566         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2567                                    && Result != Match_MissingFeature
2568                                    && Result != Match_PreferE32)) {
2569       Result = R;
2570       ErrorInfo = EI;
2571     }
2572     if (R == Match_Success)
2573       break;
2574   }
2575 
2576   switch (Result) {
2577   default: break;
2578   case Match_Success:
2579     if (!validateInstruction(Inst, IDLoc)) {
2580       return true;
2581     }
2582     Inst.setLoc(IDLoc);
2583     Out.EmitInstruction(Inst, getSTI());
2584     return false;
2585 
2586   case Match_MissingFeature:
2587     return Error(IDLoc, "instruction not supported on this GPU");
2588 
2589   case Match_MnemonicFail: {
2590     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2591     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2592         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2593     return Error(IDLoc, "invalid instruction" + Suggestion,
2594                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2595   }
2596 
2597   case Match_InvalidOperand: {
2598     SMLoc ErrorLoc = IDLoc;
2599     if (ErrorInfo != ~0ULL) {
2600       if (ErrorInfo >= Operands.size()) {
2601         return Error(IDLoc, "too few operands for instruction");
2602       }
2603       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2604       if (ErrorLoc == SMLoc())
2605         ErrorLoc = IDLoc;
2606     }
2607     return Error(ErrorLoc, "invalid operand for instruction");
2608   }
2609 
2610   case Match_PreferE32:
2611     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2612                         "should be encoded as e32");
2613   }
2614   llvm_unreachable("Implement any new match types added!");
2615 }
2616 
2617 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2618   int64_t Tmp = -1;
2619   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2620     return true;
2621   }
2622   if (getParser().parseAbsoluteExpression(Tmp)) {
2623     return true;
2624   }
2625   Ret = static_cast<uint32_t>(Tmp);
2626   return false;
2627 }
2628 
2629 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2630                                                uint32_t &Minor) {
2631   if (ParseAsAbsoluteExpression(Major))
2632     return TokError("invalid major version");
2633 
2634   if (getLexer().isNot(AsmToken::Comma))
2635     return TokError("minor version number required, comma expected");
2636   Lex();
2637 
2638   if (ParseAsAbsoluteExpression(Minor))
2639     return TokError("invalid minor version");
2640 
2641   return false;
2642 }
2643 
2644 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2645   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2646     return TokError("directive only supported for amdgcn architecture");
2647 
2648   std::string Target;
2649 
2650   SMLoc TargetStart = getTok().getLoc();
2651   if (getParser().parseEscapedString(Target))
2652     return true;
2653   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2654 
2655   std::string ExpectedTarget;
2656   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2657   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2658 
2659   if (Target != ExpectedTargetOS.str())
2660     return getParser().Error(TargetRange.Start, "target must match options",
2661                              TargetRange);
2662 
2663   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2664   return false;
2665 }
2666 
2667 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2668   return getParser().Error(Range.Start, "value out of range", Range);
2669 }
2670 
2671 bool AMDGPUAsmParser::calculateGPRBlocks(
2672     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2673     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2674     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2675     unsigned &SGPRBlocks) {
2676   // TODO(scott.linder): These calculations are duplicated from
2677   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2678   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2679 
2680   unsigned NumVGPRs = NextFreeVGPR;
2681   unsigned NumSGPRs = NextFreeSGPR;
2682   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2683 
2684   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2685       NumSGPRs > MaxAddressableNumSGPRs)
2686     return OutOfRangeError(SGPRRange);
2687 
2688   NumSGPRs +=
2689       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2690 
2691   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2692       NumSGPRs > MaxAddressableNumSGPRs)
2693     return OutOfRangeError(SGPRRange);
2694 
2695   if (Features.test(FeatureSGPRInitBug))
2696     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2697 
2698   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2699   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2700 
2701   return false;
2702 }
2703 
2704 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2705   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2706     return TokError("directive only supported for amdgcn architecture");
2707 
2708   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2709     return TokError("directive only supported for amdhsa OS");
2710 
2711   StringRef KernelName;
2712   if (getParser().parseIdentifier(KernelName))
2713     return true;
2714 
2715   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2716 
2717   StringSet<> Seen;
2718 
2719   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2720 
2721   SMRange VGPRRange;
2722   uint64_t NextFreeVGPR = 0;
2723   SMRange SGPRRange;
2724   uint64_t NextFreeSGPR = 0;
2725   unsigned UserSGPRCount = 0;
2726   bool ReserveVCC = true;
2727   bool ReserveFlatScr = true;
2728   bool ReserveXNACK = hasXNACK();
2729 
2730   while (true) {
2731     while (getLexer().is(AsmToken::EndOfStatement))
2732       Lex();
2733 
2734     if (getLexer().isNot(AsmToken::Identifier))
2735       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2736 
2737     StringRef ID = getTok().getIdentifier();
2738     SMRange IDRange = getTok().getLocRange();
2739     Lex();
2740 
2741     if (ID == ".end_amdhsa_kernel")
2742       break;
2743 
2744     if (Seen.find(ID) != Seen.end())
2745       return TokError(".amdhsa_ directives cannot be repeated");
2746     Seen.insert(ID);
2747 
2748     SMLoc ValStart = getTok().getLoc();
2749     int64_t IVal;
2750     if (getParser().parseAbsoluteExpression(IVal))
2751       return true;
2752     SMLoc ValEnd = getTok().getLoc();
2753     SMRange ValRange = SMRange(ValStart, ValEnd);
2754 
2755     if (IVal < 0)
2756       return OutOfRangeError(ValRange);
2757 
2758     uint64_t Val = IVal;
2759 
2760 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2761   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2762     return OutOfRangeError(RANGE);                                             \
2763   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2764 
2765     if (ID == ".amdhsa_group_segment_fixed_size") {
2766       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2767         return OutOfRangeError(ValRange);
2768       KD.group_segment_fixed_size = Val;
2769     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2770       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2771         return OutOfRangeError(ValRange);
2772       KD.private_segment_fixed_size = Val;
2773     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2774       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2775                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2776                        Val, ValRange);
2777       UserSGPRCount++;
2778     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2779       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2780                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2781                        ValRange);
2782       UserSGPRCount++;
2783     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2784       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2785                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2786                        ValRange);
2787       UserSGPRCount++;
2788     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2789       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2790                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2791                        Val, ValRange);
2792       UserSGPRCount++;
2793     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2794       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2795                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2796                        ValRange);
2797       UserSGPRCount++;
2798     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2799       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2800                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2801                        ValRange);
2802       UserSGPRCount++;
2803     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2804       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2805                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2806                        Val, ValRange);
2807       UserSGPRCount++;
2808     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2809       PARSE_BITS_ENTRY(
2810           KD.compute_pgm_rsrc2,
2811           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2812           ValRange);
2813     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2814       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2815                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2816                        ValRange);
2817     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2818       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2819                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2820                        ValRange);
2821     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2822       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2823                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2824                        ValRange);
2825     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2826       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2827                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2828                        ValRange);
2829     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2830       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2831                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2832                        ValRange);
2833     } else if (ID == ".amdhsa_next_free_vgpr") {
2834       VGPRRange = ValRange;
2835       NextFreeVGPR = Val;
2836     } else if (ID == ".amdhsa_next_free_sgpr") {
2837       SGPRRange = ValRange;
2838       NextFreeSGPR = Val;
2839     } else if (ID == ".amdhsa_reserve_vcc") {
2840       if (!isUInt<1>(Val))
2841         return OutOfRangeError(ValRange);
2842       ReserveVCC = Val;
2843     } else if (ID == ".amdhsa_reserve_flat_scratch") {
2844       if (IVersion.Major < 7)
2845         return getParser().Error(IDRange.Start, "directive requires gfx7+",
2846                                  IDRange);
2847       if (!isUInt<1>(Val))
2848         return OutOfRangeError(ValRange);
2849       ReserveFlatScr = Val;
2850     } else if (ID == ".amdhsa_reserve_xnack_mask") {
2851       if (IVersion.Major < 8)
2852         return getParser().Error(IDRange.Start, "directive requires gfx8+",
2853                                  IDRange);
2854       if (!isUInt<1>(Val))
2855         return OutOfRangeError(ValRange);
2856       ReserveXNACK = Val;
2857     } else if (ID == ".amdhsa_float_round_mode_32") {
2858       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2859                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2860     } else if (ID == ".amdhsa_float_round_mode_16_64") {
2861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2862                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2863     } else if (ID == ".amdhsa_float_denorm_mode_32") {
2864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2865                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2866     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2867       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2868                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2869                        ValRange);
2870     } else if (ID == ".amdhsa_dx10_clamp") {
2871       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2872                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
2873     } else if (ID == ".amdhsa_ieee_mode") {
2874       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
2875                        Val, ValRange);
2876     } else if (ID == ".amdhsa_fp16_overflow") {
2877       if (IVersion.Major < 9)
2878         return getParser().Error(IDRange.Start, "directive requires gfx9+",
2879                                  IDRange);
2880       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
2881                        ValRange);
2882     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
2883       PARSE_BITS_ENTRY(
2884           KD.compute_pgm_rsrc2,
2885           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
2886           ValRange);
2887     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
2888       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2889                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
2890                        Val, ValRange);
2891     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
2892       PARSE_BITS_ENTRY(
2893           KD.compute_pgm_rsrc2,
2894           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
2895           ValRange);
2896     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
2897       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2898                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
2899                        Val, ValRange);
2900     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
2901       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2902                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
2903                        Val, ValRange);
2904     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
2905       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2906                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
2907                        Val, ValRange);
2908     } else if (ID == ".amdhsa_exception_int_div_zero") {
2909       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2910                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
2911                        Val, ValRange);
2912     } else {
2913       return getParser().Error(IDRange.Start,
2914                                "unknown .amdhsa_kernel directive", IDRange);
2915     }
2916 
2917 #undef PARSE_BITS_ENTRY
2918   }
2919 
2920   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
2921     return TokError(".amdhsa_next_free_vgpr directive is required");
2922 
2923   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
2924     return TokError(".amdhsa_next_free_sgpr directive is required");
2925 
2926   unsigned VGPRBlocks;
2927   unsigned SGPRBlocks;
2928   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
2929                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
2930                          SGPRRange, VGPRBlocks, SGPRBlocks))
2931     return true;
2932 
2933   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
2934           VGPRBlocks))
2935     return OutOfRangeError(VGPRRange);
2936   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2937                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
2938 
2939   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
2940           SGPRBlocks))
2941     return OutOfRangeError(SGPRRange);
2942   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2943                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2944                   SGPRBlocks);
2945 
2946   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
2947     return TokError("too many user SGPRs enabled");
2948   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
2949                   UserSGPRCount);
2950 
2951   getTargetStreamer().EmitAmdhsaKernelDescriptor(
2952       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
2953       ReserveFlatScr, ReserveXNACK);
2954   return false;
2955 }
2956 
2957 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2958   uint32_t Major;
2959   uint32_t Minor;
2960 
2961   if (ParseDirectiveMajorMinor(Major, Minor))
2962     return true;
2963 
2964   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2965   return false;
2966 }
2967 
2968 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2969   uint32_t Major;
2970   uint32_t Minor;
2971   uint32_t Stepping;
2972   StringRef VendorName;
2973   StringRef ArchName;
2974 
2975   // If this directive has no arguments, then use the ISA version for the
2976   // targeted GPU.
2977   if (getLexer().is(AsmToken::EndOfStatement)) {
2978     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
2979     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2980                                                       ISA.Stepping,
2981                                                       "AMD", "AMDGPU");
2982     return false;
2983   }
2984 
2985   if (ParseDirectiveMajorMinor(Major, Minor))
2986     return true;
2987 
2988   if (getLexer().isNot(AsmToken::Comma))
2989     return TokError("stepping version number required, comma expected");
2990   Lex();
2991 
2992   if (ParseAsAbsoluteExpression(Stepping))
2993     return TokError("invalid stepping version");
2994 
2995   if (getLexer().isNot(AsmToken::Comma))
2996     return TokError("vendor name required, comma expected");
2997   Lex();
2998 
2999   if (getLexer().isNot(AsmToken::String))
3000     return TokError("invalid vendor name");
3001 
3002   VendorName = getLexer().getTok().getStringContents();
3003   Lex();
3004 
3005   if (getLexer().isNot(AsmToken::Comma))
3006     return TokError("arch name required, comma expected");
3007   Lex();
3008 
3009   if (getLexer().isNot(AsmToken::String))
3010     return TokError("invalid arch name");
3011 
3012   ArchName = getLexer().getTok().getStringContents();
3013   Lex();
3014 
3015   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3016                                                     VendorName, ArchName);
3017   return false;
3018 }
3019 
3020 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3021                                                amd_kernel_code_t &Header) {
3022   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3023   // assembly for backwards compatibility.
3024   if (ID == "max_scratch_backing_memory_byte_size") {
3025     Parser.eatToEndOfStatement();
3026     return false;
3027   }
3028 
3029   SmallString<40> ErrStr;
3030   raw_svector_ostream Err(ErrStr);
3031   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3032     return TokError(Err.str());
3033   }
3034   Lex();
3035   return false;
3036 }
3037 
3038 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3039   amd_kernel_code_t Header;
3040   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3041 
3042   while (true) {
3043     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3044     // will set the current token to EndOfStatement.
3045     while(getLexer().is(AsmToken::EndOfStatement))
3046       Lex();
3047 
3048     if (getLexer().isNot(AsmToken::Identifier))
3049       return TokError("expected value identifier or .end_amd_kernel_code_t");
3050 
3051     StringRef ID = getLexer().getTok().getIdentifier();
3052     Lex();
3053 
3054     if (ID == ".end_amd_kernel_code_t")
3055       break;
3056 
3057     if (ParseAMDKernelCodeTValue(ID, Header))
3058       return true;
3059   }
3060 
3061   getTargetStreamer().EmitAMDKernelCodeT(Header);
3062 
3063   return false;
3064 }
3065 
3066 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3067   if (getLexer().isNot(AsmToken::Identifier))
3068     return TokError("expected symbol name");
3069 
3070   StringRef KernelName = Parser.getTok().getString();
3071 
3072   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3073                                            ELF::STT_AMDGPU_HSA_KERNEL);
3074   Lex();
3075   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3076     KernelScope.initialize(getContext());
3077   return false;
3078 }
3079 
3080 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3081   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3082     return Error(getParser().getTok().getLoc(),
3083                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3084                  "architectures");
3085   }
3086 
3087   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3088 
3089   std::string ISAVersionStringFromSTI;
3090   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3091   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3092 
3093   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3094     return Error(getParser().getTok().getLoc(),
3095                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3096                  "arguments specified through the command line");
3097   }
3098 
3099   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3100   Lex();
3101 
3102   return false;
3103 }
3104 
3105 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3106   const char *AssemblerDirectiveBegin;
3107   const char *AssemblerDirectiveEnd;
3108   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3109       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3110           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3111                             HSAMD::V3::AssemblerDirectiveEnd)
3112           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3113                             HSAMD::AssemblerDirectiveEnd);
3114 
3115   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3116     return Error(getParser().getTok().getLoc(),
3117                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3118                  "not available on non-amdhsa OSes")).str());
3119   }
3120 
3121   std::string HSAMetadataString;
3122   raw_string_ostream YamlStream(HSAMetadataString);
3123 
3124   getLexer().setSkipSpace(false);
3125 
3126   bool FoundEnd = false;
3127   while (!getLexer().is(AsmToken::Eof)) {
3128     while (getLexer().is(AsmToken::Space)) {
3129       YamlStream << getLexer().getTok().getString();
3130       Lex();
3131     }
3132 
3133     if (getLexer().is(AsmToken::Identifier)) {
3134       StringRef ID = getLexer().getTok().getIdentifier();
3135       if (ID == AssemblerDirectiveEnd) {
3136         Lex();
3137         FoundEnd = true;
3138         break;
3139       }
3140     }
3141 
3142     YamlStream << Parser.parseStringToEndOfStatement()
3143                << getContext().getAsmInfo()->getSeparatorString();
3144 
3145     Parser.eatToEndOfStatement();
3146   }
3147 
3148   getLexer().setSkipSpace(true);
3149 
3150   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3151     return TokError(Twine("expected directive ") +
3152                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3153   }
3154 
3155   YamlStream.flush();
3156 
3157   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3158     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3159       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3160   } else {
3161     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3162       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3163   }
3164 
3165   return false;
3166 }
3167 
3168 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3169   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3170     return Error(getParser().getTok().getLoc(),
3171                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3172                  "not available on non-amdpal OSes")).str());
3173   }
3174 
3175   PALMD::Metadata PALMetadata;
3176   for (;;) {
3177     uint32_t Value;
3178     if (ParseAsAbsoluteExpression(Value)) {
3179       return TokError(Twine("invalid value in ") +
3180                       Twine(PALMD::AssemblerDirective));
3181     }
3182     PALMetadata.push_back(Value);
3183     if (getLexer().isNot(AsmToken::Comma))
3184       break;
3185     Lex();
3186   }
3187   getTargetStreamer().EmitPALMetadata(PALMetadata);
3188   return false;
3189 }
3190 
3191 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3192   StringRef IDVal = DirectiveID.getString();
3193 
3194   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3195     if (IDVal == ".amdgcn_target")
3196       return ParseDirectiveAMDGCNTarget();
3197 
3198     if (IDVal == ".amdhsa_kernel")
3199       return ParseDirectiveAMDHSAKernel();
3200 
3201     // TODO: Restructure/combine with PAL metadata directive.
3202     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3203       return ParseDirectiveHSAMetadata();
3204   } else {
3205     if (IDVal == ".hsa_code_object_version")
3206       return ParseDirectiveHSACodeObjectVersion();
3207 
3208     if (IDVal == ".hsa_code_object_isa")
3209       return ParseDirectiveHSACodeObjectISA();
3210 
3211     if (IDVal == ".amd_kernel_code_t")
3212       return ParseDirectiveAMDKernelCodeT();
3213 
3214     if (IDVal == ".amdgpu_hsa_kernel")
3215       return ParseDirectiveAMDGPUHsaKernel();
3216 
3217     if (IDVal == ".amd_amdgpu_isa")
3218       return ParseDirectiveISAVersion();
3219 
3220     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3221       return ParseDirectiveHSAMetadata();
3222   }
3223 
3224   if (IDVal == PALMD::AssemblerDirective)
3225     return ParseDirectivePALMetadata();
3226 
3227   return true;
3228 }
3229 
3230 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3231                                            unsigned RegNo) const {
3232 
3233   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3234        R.isValid(); ++R) {
3235     if (*R == RegNo)
3236       return isGFX9();
3237   }
3238 
3239   switch (RegNo) {
3240   case AMDGPU::TBA:
3241   case AMDGPU::TBA_LO:
3242   case AMDGPU::TBA_HI:
3243   case AMDGPU::TMA:
3244   case AMDGPU::TMA_LO:
3245   case AMDGPU::TMA_HI:
3246     return !isGFX9();
3247   case AMDGPU::XNACK_MASK:
3248   case AMDGPU::XNACK_MASK_LO:
3249   case AMDGPU::XNACK_MASK_HI:
3250     return !isCI() && !isSI() && hasXNACK();
3251   default:
3252     break;
3253   }
3254 
3255   if (isCI())
3256     return true;
3257 
3258   if (isSI()) {
3259     // No flat_scr
3260     switch (RegNo) {
3261     case AMDGPU::FLAT_SCR:
3262     case AMDGPU::FLAT_SCR_LO:
3263     case AMDGPU::FLAT_SCR_HI:
3264       return false;
3265     default:
3266       return true;
3267     }
3268   }
3269 
3270   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3271   // SI/CI have.
3272   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3273        R.isValid(); ++R) {
3274     if (*R == RegNo)
3275       return false;
3276   }
3277 
3278   return true;
3279 }
3280 
3281 OperandMatchResultTy
3282 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3283   // Try to parse with a custom parser
3284   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3285 
3286   // If we successfully parsed the operand or if there as an error parsing,
3287   // we are done.
3288   //
3289   // If we are parsing after we reach EndOfStatement then this means we
3290   // are appending default values to the Operands list.  This is only done
3291   // by custom parser, so we shouldn't continue on to the generic parsing.
3292   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3293       getLexer().is(AsmToken::EndOfStatement))
3294     return ResTy;
3295 
3296   ResTy = parseRegOrImm(Operands);
3297 
3298   if (ResTy == MatchOperand_Success)
3299     return ResTy;
3300 
3301   const auto &Tok = Parser.getTok();
3302   SMLoc S = Tok.getLoc();
3303 
3304   const MCExpr *Expr = nullptr;
3305   if (!Parser.parseExpression(Expr)) {
3306     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3307     return MatchOperand_Success;
3308   }
3309 
3310   // Possibly this is an instruction flag like 'gds'.
3311   if (Tok.getKind() == AsmToken::Identifier) {
3312     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3313     Parser.Lex();
3314     return MatchOperand_Success;
3315   }
3316 
3317   return MatchOperand_NoMatch;
3318 }
3319 
3320 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3321   // Clear any forced encodings from the previous instruction.
3322   setForcedEncodingSize(0);
3323   setForcedDPP(false);
3324   setForcedSDWA(false);
3325 
3326   if (Name.endswith("_e64")) {
3327     setForcedEncodingSize(64);
3328     return Name.substr(0, Name.size() - 4);
3329   } else if (Name.endswith("_e32")) {
3330     setForcedEncodingSize(32);
3331     return Name.substr(0, Name.size() - 4);
3332   } else if (Name.endswith("_dpp")) {
3333     setForcedDPP(true);
3334     return Name.substr(0, Name.size() - 4);
3335   } else if (Name.endswith("_sdwa")) {
3336     setForcedSDWA(true);
3337     return Name.substr(0, Name.size() - 5);
3338   }
3339   return Name;
3340 }
3341 
3342 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3343                                        StringRef Name,
3344                                        SMLoc NameLoc, OperandVector &Operands) {
3345   // Add the instruction mnemonic
3346   Name = parseMnemonicSuffix(Name);
3347   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3348 
3349   while (!getLexer().is(AsmToken::EndOfStatement)) {
3350     OperandMatchResultTy Res = parseOperand(Operands, Name);
3351 
3352     // Eat the comma or space if there is one.
3353     if (getLexer().is(AsmToken::Comma))
3354       Parser.Lex();
3355 
3356     switch (Res) {
3357       case MatchOperand_Success: break;
3358       case MatchOperand_ParseFail:
3359         Error(getLexer().getLoc(), "failed parsing operand.");
3360         while (!getLexer().is(AsmToken::EndOfStatement)) {
3361           Parser.Lex();
3362         }
3363         return true;
3364       case MatchOperand_NoMatch:
3365         Error(getLexer().getLoc(), "not a valid operand.");
3366         while (!getLexer().is(AsmToken::EndOfStatement)) {
3367           Parser.Lex();
3368         }
3369         return true;
3370     }
3371   }
3372 
3373   return false;
3374 }
3375 
3376 //===----------------------------------------------------------------------===//
3377 // Utility functions
3378 //===----------------------------------------------------------------------===//
3379 
3380 OperandMatchResultTy
3381 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3382   switch(getLexer().getKind()) {
3383     default: return MatchOperand_NoMatch;
3384     case AsmToken::Identifier: {
3385       StringRef Name = Parser.getTok().getString();
3386       if (!Name.equals(Prefix)) {
3387         return MatchOperand_NoMatch;
3388       }
3389 
3390       Parser.Lex();
3391       if (getLexer().isNot(AsmToken::Colon))
3392         return MatchOperand_ParseFail;
3393 
3394       Parser.Lex();
3395 
3396       bool IsMinus = false;
3397       if (getLexer().getKind() == AsmToken::Minus) {
3398         Parser.Lex();
3399         IsMinus = true;
3400       }
3401 
3402       if (getLexer().isNot(AsmToken::Integer))
3403         return MatchOperand_ParseFail;
3404 
3405       if (getParser().parseAbsoluteExpression(Int))
3406         return MatchOperand_ParseFail;
3407 
3408       if (IsMinus)
3409         Int = -Int;
3410       break;
3411     }
3412   }
3413   return MatchOperand_Success;
3414 }
3415 
3416 OperandMatchResultTy
3417 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3418                                     AMDGPUOperand::ImmTy ImmTy,
3419                                     bool (*ConvertResult)(int64_t&)) {
3420   SMLoc S = Parser.getTok().getLoc();
3421   int64_t Value = 0;
3422 
3423   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3424   if (Res != MatchOperand_Success)
3425     return Res;
3426 
3427   if (ConvertResult && !ConvertResult(Value)) {
3428     return MatchOperand_ParseFail;
3429   }
3430 
3431   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3432   return MatchOperand_Success;
3433 }
3434 
3435 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3436   const char *Prefix,
3437   OperandVector &Operands,
3438   AMDGPUOperand::ImmTy ImmTy,
3439   bool (*ConvertResult)(int64_t&)) {
3440   StringRef Name = Parser.getTok().getString();
3441   if (!Name.equals(Prefix))
3442     return MatchOperand_NoMatch;
3443 
3444   Parser.Lex();
3445   if (getLexer().isNot(AsmToken::Colon))
3446     return MatchOperand_ParseFail;
3447 
3448   Parser.Lex();
3449   if (getLexer().isNot(AsmToken::LBrac))
3450     return MatchOperand_ParseFail;
3451   Parser.Lex();
3452 
3453   unsigned Val = 0;
3454   SMLoc S = Parser.getTok().getLoc();
3455 
3456   // FIXME: How to verify the number of elements matches the number of src
3457   // operands?
3458   for (int I = 0; I < 4; ++I) {
3459     if (I != 0) {
3460       if (getLexer().is(AsmToken::RBrac))
3461         break;
3462 
3463       if (getLexer().isNot(AsmToken::Comma))
3464         return MatchOperand_ParseFail;
3465       Parser.Lex();
3466     }
3467 
3468     if (getLexer().isNot(AsmToken::Integer))
3469       return MatchOperand_ParseFail;
3470 
3471     int64_t Op;
3472     if (getParser().parseAbsoluteExpression(Op))
3473       return MatchOperand_ParseFail;
3474 
3475     if (Op != 0 && Op != 1)
3476       return MatchOperand_ParseFail;
3477     Val |= (Op << I);
3478   }
3479 
3480   Parser.Lex();
3481   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3482   return MatchOperand_Success;
3483 }
3484 
3485 OperandMatchResultTy
3486 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3487                                AMDGPUOperand::ImmTy ImmTy) {
3488   int64_t Bit = 0;
3489   SMLoc S = Parser.getTok().getLoc();
3490 
3491   // We are at the end of the statement, and this is a default argument, so
3492   // use a default value.
3493   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3494     switch(getLexer().getKind()) {
3495       case AsmToken::Identifier: {
3496         StringRef Tok = Parser.getTok().getString();
3497         if (Tok == Name) {
3498           if (Tok == "r128" && isGFX9())
3499             Error(S, "r128 modifier is not supported on this GPU");
3500           if (Tok == "a16" && !isGFX9())
3501             Error(S, "a16 modifier is not supported on this GPU");
3502           Bit = 1;
3503           Parser.Lex();
3504         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3505           Bit = 0;
3506           Parser.Lex();
3507         } else {
3508           return MatchOperand_NoMatch;
3509         }
3510         break;
3511       }
3512       default:
3513         return MatchOperand_NoMatch;
3514     }
3515   }
3516 
3517   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3518   return MatchOperand_Success;
3519 }
3520 
3521 static void addOptionalImmOperand(
3522   MCInst& Inst, const OperandVector& Operands,
3523   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3524   AMDGPUOperand::ImmTy ImmT,
3525   int64_t Default = 0) {
3526   auto i = OptionalIdx.find(ImmT);
3527   if (i != OptionalIdx.end()) {
3528     unsigned Idx = i->second;
3529     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3530   } else {
3531     Inst.addOperand(MCOperand::createImm(Default));
3532   }
3533 }
3534 
3535 OperandMatchResultTy
3536 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3537   if (getLexer().isNot(AsmToken::Identifier)) {
3538     return MatchOperand_NoMatch;
3539   }
3540   StringRef Tok = Parser.getTok().getString();
3541   if (Tok != Prefix) {
3542     return MatchOperand_NoMatch;
3543   }
3544 
3545   Parser.Lex();
3546   if (getLexer().isNot(AsmToken::Colon)) {
3547     return MatchOperand_ParseFail;
3548   }
3549 
3550   Parser.Lex();
3551   if (getLexer().isNot(AsmToken::Identifier)) {
3552     return MatchOperand_ParseFail;
3553   }
3554 
3555   Value = Parser.getTok().getString();
3556   return MatchOperand_Success;
3557 }
3558 
3559 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3560 // values to live in a joint format operand in the MCInst encoding.
3561 OperandMatchResultTy
3562 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3563   SMLoc S = Parser.getTok().getLoc();
3564   int64_t Dfmt = 0, Nfmt = 0;
3565   // dfmt and nfmt can appear in either order, and each is optional.
3566   bool GotDfmt = false, GotNfmt = false;
3567   while (!GotDfmt || !GotNfmt) {
3568     if (!GotDfmt) {
3569       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3570       if (Res != MatchOperand_NoMatch) {
3571         if (Res != MatchOperand_Success)
3572           return Res;
3573         if (Dfmt >= 16) {
3574           Error(Parser.getTok().getLoc(), "out of range dfmt");
3575           return MatchOperand_ParseFail;
3576         }
3577         GotDfmt = true;
3578         Parser.Lex();
3579         continue;
3580       }
3581     }
3582     if (!GotNfmt) {
3583       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3584       if (Res != MatchOperand_NoMatch) {
3585         if (Res != MatchOperand_Success)
3586           return Res;
3587         if (Nfmt >= 8) {
3588           Error(Parser.getTok().getLoc(), "out of range nfmt");
3589           return MatchOperand_ParseFail;
3590         }
3591         GotNfmt = true;
3592         Parser.Lex();
3593         continue;
3594       }
3595     }
3596     break;
3597   }
3598   if (!GotDfmt && !GotNfmt)
3599     return MatchOperand_NoMatch;
3600   auto Format = Dfmt | Nfmt << 4;
3601   Operands.push_back(
3602       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3603   return MatchOperand_Success;
3604 }
3605 
3606 //===----------------------------------------------------------------------===//
3607 // ds
3608 //===----------------------------------------------------------------------===//
3609 
3610 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3611                                     const OperandVector &Operands) {
3612   OptionalImmIndexMap OptionalIdx;
3613 
3614   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3615     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3616 
3617     // Add the register arguments
3618     if (Op.isReg()) {
3619       Op.addRegOperands(Inst, 1);
3620       continue;
3621     }
3622 
3623     // Handle optional arguments
3624     OptionalIdx[Op.getImmTy()] = i;
3625   }
3626 
3627   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3628   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3629   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3630 
3631   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3632 }
3633 
3634 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3635                                 bool IsGdsHardcoded) {
3636   OptionalImmIndexMap OptionalIdx;
3637 
3638   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3639     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3640 
3641     // Add the register arguments
3642     if (Op.isReg()) {
3643       Op.addRegOperands(Inst, 1);
3644       continue;
3645     }
3646 
3647     if (Op.isToken() && Op.getToken() == "gds") {
3648       IsGdsHardcoded = true;
3649       continue;
3650     }
3651 
3652     // Handle optional arguments
3653     OptionalIdx[Op.getImmTy()] = i;
3654   }
3655 
3656   AMDGPUOperand::ImmTy OffsetType =
3657     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3658      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3659                                                       AMDGPUOperand::ImmTyOffset;
3660 
3661   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3662 
3663   if (!IsGdsHardcoded) {
3664     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3665   }
3666   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3667 }
3668 
3669 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3670   OptionalImmIndexMap OptionalIdx;
3671 
3672   unsigned OperandIdx[4];
3673   unsigned EnMask = 0;
3674   int SrcIdx = 0;
3675 
3676   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3677     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3678 
3679     // Add the register arguments
3680     if (Op.isReg()) {
3681       assert(SrcIdx < 4);
3682       OperandIdx[SrcIdx] = Inst.size();
3683       Op.addRegOperands(Inst, 1);
3684       ++SrcIdx;
3685       continue;
3686     }
3687 
3688     if (Op.isOff()) {
3689       assert(SrcIdx < 4);
3690       OperandIdx[SrcIdx] = Inst.size();
3691       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3692       ++SrcIdx;
3693       continue;
3694     }
3695 
3696     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3697       Op.addImmOperands(Inst, 1);
3698       continue;
3699     }
3700 
3701     if (Op.isToken() && Op.getToken() == "done")
3702       continue;
3703 
3704     // Handle optional arguments
3705     OptionalIdx[Op.getImmTy()] = i;
3706   }
3707 
3708   assert(SrcIdx == 4);
3709 
3710   bool Compr = false;
3711   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3712     Compr = true;
3713     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3714     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3715     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3716   }
3717 
3718   for (auto i = 0; i < SrcIdx; ++i) {
3719     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3720       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3721     }
3722   }
3723 
3724   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3725   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3726 
3727   Inst.addOperand(MCOperand::createImm(EnMask));
3728 }
3729 
3730 //===----------------------------------------------------------------------===//
3731 // s_waitcnt
3732 //===----------------------------------------------------------------------===//
3733 
3734 static bool
3735 encodeCnt(
3736   const AMDGPU::IsaVersion ISA,
3737   int64_t &IntVal,
3738   int64_t CntVal,
3739   bool Saturate,
3740   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3741   unsigned (*decode)(const IsaVersion &Version, unsigned))
3742 {
3743   bool Failed = false;
3744 
3745   IntVal = encode(ISA, IntVal, CntVal);
3746   if (CntVal != decode(ISA, IntVal)) {
3747     if (Saturate) {
3748       IntVal = encode(ISA, IntVal, -1);
3749     } else {
3750       Failed = true;
3751     }
3752   }
3753   return Failed;
3754 }
3755 
3756 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3757   StringRef CntName = Parser.getTok().getString();
3758   int64_t CntVal;
3759 
3760   Parser.Lex();
3761   if (getLexer().isNot(AsmToken::LParen))
3762     return true;
3763 
3764   Parser.Lex();
3765   if (getLexer().isNot(AsmToken::Integer))
3766     return true;
3767 
3768   SMLoc ValLoc = Parser.getTok().getLoc();
3769   if (getParser().parseAbsoluteExpression(CntVal))
3770     return true;
3771 
3772   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3773 
3774   bool Failed = true;
3775   bool Sat = CntName.endswith("_sat");
3776 
3777   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3778     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3779   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3780     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3781   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3782     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3783   }
3784 
3785   if (Failed) {
3786     Error(ValLoc, "too large value for " + CntName);
3787     return true;
3788   }
3789 
3790   if (getLexer().isNot(AsmToken::RParen)) {
3791     return true;
3792   }
3793 
3794   Parser.Lex();
3795   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3796     const AsmToken NextToken = getLexer().peekTok();
3797     if (NextToken.is(AsmToken::Identifier)) {
3798       Parser.Lex();
3799     }
3800   }
3801 
3802   return false;
3803 }
3804 
3805 OperandMatchResultTy
3806 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3807   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3808   int64_t Waitcnt = getWaitcntBitMask(ISA);
3809   SMLoc S = Parser.getTok().getLoc();
3810 
3811   switch(getLexer().getKind()) {
3812     default: return MatchOperand_ParseFail;
3813     case AsmToken::Integer:
3814       // The operand can be an integer value.
3815       if (getParser().parseAbsoluteExpression(Waitcnt))
3816         return MatchOperand_ParseFail;
3817       break;
3818 
3819     case AsmToken::Identifier:
3820       do {
3821         if (parseCnt(Waitcnt))
3822           return MatchOperand_ParseFail;
3823       } while(getLexer().isNot(AsmToken::EndOfStatement));
3824       break;
3825   }
3826   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3827   return MatchOperand_Success;
3828 }
3829 
3830 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3831                                           int64_t &Width) {
3832   using namespace llvm::AMDGPU::Hwreg;
3833 
3834   if (Parser.getTok().getString() != "hwreg")
3835     return true;
3836   Parser.Lex();
3837 
3838   if (getLexer().isNot(AsmToken::LParen))
3839     return true;
3840   Parser.Lex();
3841 
3842   if (getLexer().is(AsmToken::Identifier)) {
3843     HwReg.IsSymbolic = true;
3844     HwReg.Id = ID_UNKNOWN_;
3845     const StringRef tok = Parser.getTok().getString();
3846     int Last = ID_SYMBOLIC_LAST_;
3847     if (isSI() || isCI() || isVI())
3848       Last = ID_SYMBOLIC_FIRST_GFX9_;
3849     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3850       if (tok == IdSymbolic[i]) {
3851         HwReg.Id = i;
3852         break;
3853       }
3854     }
3855     Parser.Lex();
3856   } else {
3857     HwReg.IsSymbolic = false;
3858     if (getLexer().isNot(AsmToken::Integer))
3859       return true;
3860     if (getParser().parseAbsoluteExpression(HwReg.Id))
3861       return true;
3862   }
3863 
3864   if (getLexer().is(AsmToken::RParen)) {
3865     Parser.Lex();
3866     return false;
3867   }
3868 
3869   // optional params
3870   if (getLexer().isNot(AsmToken::Comma))
3871     return true;
3872   Parser.Lex();
3873 
3874   if (getLexer().isNot(AsmToken::Integer))
3875     return true;
3876   if (getParser().parseAbsoluteExpression(Offset))
3877     return true;
3878 
3879   if (getLexer().isNot(AsmToken::Comma))
3880     return true;
3881   Parser.Lex();
3882 
3883   if (getLexer().isNot(AsmToken::Integer))
3884     return true;
3885   if (getParser().parseAbsoluteExpression(Width))
3886     return true;
3887 
3888   if (getLexer().isNot(AsmToken::RParen))
3889     return true;
3890   Parser.Lex();
3891 
3892   return false;
3893 }
3894 
3895 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3896   using namespace llvm::AMDGPU::Hwreg;
3897 
3898   int64_t Imm16Val = 0;
3899   SMLoc S = Parser.getTok().getLoc();
3900 
3901   switch(getLexer().getKind()) {
3902     default: return MatchOperand_NoMatch;
3903     case AsmToken::Integer:
3904       // The operand can be an integer value.
3905       if (getParser().parseAbsoluteExpression(Imm16Val))
3906         return MatchOperand_NoMatch;
3907       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3908         Error(S, "invalid immediate: only 16-bit values are legal");
3909         // Do not return error code, but create an imm operand anyway and proceed
3910         // to the next operand, if any. That avoids unneccessary error messages.
3911       }
3912       break;
3913 
3914     case AsmToken::Identifier: {
3915         OperandInfoTy HwReg(ID_UNKNOWN_);
3916         int64_t Offset = OFFSET_DEFAULT_;
3917         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3918         if (parseHwregConstruct(HwReg, Offset, Width))
3919           return MatchOperand_ParseFail;
3920         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3921           if (HwReg.IsSymbolic)
3922             Error(S, "invalid symbolic name of hardware register");
3923           else
3924             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3925         }
3926         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3927           Error(S, "invalid bit offset: only 5-bit values are legal");
3928         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3929           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3930         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3931       }
3932       break;
3933   }
3934   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3935   return MatchOperand_Success;
3936 }
3937 
3938 bool AMDGPUOperand::isSWaitCnt() const {
3939   return isImm();
3940 }
3941 
3942 bool AMDGPUOperand::isHwreg() const {
3943   return isImmTy(ImmTyHwreg);
3944 }
3945 
3946 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3947   using namespace llvm::AMDGPU::SendMsg;
3948 
3949   if (Parser.getTok().getString() != "sendmsg")
3950     return true;
3951   Parser.Lex();
3952 
3953   if (getLexer().isNot(AsmToken::LParen))
3954     return true;
3955   Parser.Lex();
3956 
3957   if (getLexer().is(AsmToken::Identifier)) {
3958     Msg.IsSymbolic = true;
3959     Msg.Id = ID_UNKNOWN_;
3960     const std::string tok = Parser.getTok().getString();
3961     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3962       switch(i) {
3963         default: continue; // Omit gaps.
3964         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3965       }
3966       if (tok == IdSymbolic[i]) {
3967         Msg.Id = i;
3968         break;
3969       }
3970     }
3971     Parser.Lex();
3972   } else {
3973     Msg.IsSymbolic = false;
3974     if (getLexer().isNot(AsmToken::Integer))
3975       return true;
3976     if (getParser().parseAbsoluteExpression(Msg.Id))
3977       return true;
3978     if (getLexer().is(AsmToken::Integer))
3979       if (getParser().parseAbsoluteExpression(Msg.Id))
3980         Msg.Id = ID_UNKNOWN_;
3981   }
3982   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3983     return false;
3984 
3985   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3986     if (getLexer().isNot(AsmToken::RParen))
3987       return true;
3988     Parser.Lex();
3989     return false;
3990   }
3991 
3992   if (getLexer().isNot(AsmToken::Comma))
3993     return true;
3994   Parser.Lex();
3995 
3996   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3997   Operation.Id = ID_UNKNOWN_;
3998   if (getLexer().is(AsmToken::Identifier)) {
3999     Operation.IsSymbolic = true;
4000     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4001     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4002     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4003     const StringRef Tok = Parser.getTok().getString();
4004     for (int i = F; i < L; ++i) {
4005       if (Tok == S[i]) {
4006         Operation.Id = i;
4007         break;
4008       }
4009     }
4010     Parser.Lex();
4011   } else {
4012     Operation.IsSymbolic = false;
4013     if (getLexer().isNot(AsmToken::Integer))
4014       return true;
4015     if (getParser().parseAbsoluteExpression(Operation.Id))
4016       return true;
4017   }
4018 
4019   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4020     // Stream id is optional.
4021     if (getLexer().is(AsmToken::RParen)) {
4022       Parser.Lex();
4023       return false;
4024     }
4025 
4026     if (getLexer().isNot(AsmToken::Comma))
4027       return true;
4028     Parser.Lex();
4029 
4030     if (getLexer().isNot(AsmToken::Integer))
4031       return true;
4032     if (getParser().parseAbsoluteExpression(StreamId))
4033       return true;
4034   }
4035 
4036   if (getLexer().isNot(AsmToken::RParen))
4037     return true;
4038   Parser.Lex();
4039   return false;
4040 }
4041 
4042 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4043   if (getLexer().getKind() != AsmToken::Identifier)
4044     return MatchOperand_NoMatch;
4045 
4046   StringRef Str = Parser.getTok().getString();
4047   int Slot = StringSwitch<int>(Str)
4048     .Case("p10", 0)
4049     .Case("p20", 1)
4050     .Case("p0", 2)
4051     .Default(-1);
4052 
4053   SMLoc S = Parser.getTok().getLoc();
4054   if (Slot == -1)
4055     return MatchOperand_ParseFail;
4056 
4057   Parser.Lex();
4058   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4059                                               AMDGPUOperand::ImmTyInterpSlot));
4060   return MatchOperand_Success;
4061 }
4062 
4063 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4064   if (getLexer().getKind() != AsmToken::Identifier)
4065     return MatchOperand_NoMatch;
4066 
4067   StringRef Str = Parser.getTok().getString();
4068   if (!Str.startswith("attr"))
4069     return MatchOperand_NoMatch;
4070 
4071   StringRef Chan = Str.take_back(2);
4072   int AttrChan = StringSwitch<int>(Chan)
4073     .Case(".x", 0)
4074     .Case(".y", 1)
4075     .Case(".z", 2)
4076     .Case(".w", 3)
4077     .Default(-1);
4078   if (AttrChan == -1)
4079     return MatchOperand_ParseFail;
4080 
4081   Str = Str.drop_back(2).drop_front(4);
4082 
4083   uint8_t Attr;
4084   if (Str.getAsInteger(10, Attr))
4085     return MatchOperand_ParseFail;
4086 
4087   SMLoc S = Parser.getTok().getLoc();
4088   Parser.Lex();
4089   if (Attr > 63) {
4090     Error(S, "out of bounds attr");
4091     return MatchOperand_Success;
4092   }
4093 
4094   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4095 
4096   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4097                                               AMDGPUOperand::ImmTyInterpAttr));
4098   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4099                                               AMDGPUOperand::ImmTyAttrChan));
4100   return MatchOperand_Success;
4101 }
4102 
4103 void AMDGPUAsmParser::errorExpTgt() {
4104   Error(Parser.getTok().getLoc(), "invalid exp target");
4105 }
4106 
4107 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4108                                                       uint8_t &Val) {
4109   if (Str == "null") {
4110     Val = 9;
4111     return MatchOperand_Success;
4112   }
4113 
4114   if (Str.startswith("mrt")) {
4115     Str = Str.drop_front(3);
4116     if (Str == "z") { // == mrtz
4117       Val = 8;
4118       return MatchOperand_Success;
4119     }
4120 
4121     if (Str.getAsInteger(10, Val))
4122       return MatchOperand_ParseFail;
4123 
4124     if (Val > 7)
4125       errorExpTgt();
4126 
4127     return MatchOperand_Success;
4128   }
4129 
4130   if (Str.startswith("pos")) {
4131     Str = Str.drop_front(3);
4132     if (Str.getAsInteger(10, Val))
4133       return MatchOperand_ParseFail;
4134 
4135     if (Val > 3)
4136       errorExpTgt();
4137 
4138     Val += 12;
4139     return MatchOperand_Success;
4140   }
4141 
4142   if (Str.startswith("param")) {
4143     Str = Str.drop_front(5);
4144     if (Str.getAsInteger(10, Val))
4145       return MatchOperand_ParseFail;
4146 
4147     if (Val >= 32)
4148       errorExpTgt();
4149 
4150     Val += 32;
4151     return MatchOperand_Success;
4152   }
4153 
4154   if (Str.startswith("invalid_target_")) {
4155     Str = Str.drop_front(15);
4156     if (Str.getAsInteger(10, Val))
4157       return MatchOperand_ParseFail;
4158 
4159     errorExpTgt();
4160     return MatchOperand_Success;
4161   }
4162 
4163   return MatchOperand_NoMatch;
4164 }
4165 
4166 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4167   uint8_t Val;
4168   StringRef Str = Parser.getTok().getString();
4169 
4170   auto Res = parseExpTgtImpl(Str, Val);
4171   if (Res != MatchOperand_Success)
4172     return Res;
4173 
4174   SMLoc S = Parser.getTok().getLoc();
4175   Parser.Lex();
4176 
4177   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4178                                               AMDGPUOperand::ImmTyExpTgt));
4179   return MatchOperand_Success;
4180 }
4181 
4182 OperandMatchResultTy
4183 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4184   using namespace llvm::AMDGPU::SendMsg;
4185 
4186   int64_t Imm16Val = 0;
4187   SMLoc S = Parser.getTok().getLoc();
4188 
4189   switch(getLexer().getKind()) {
4190   default:
4191     return MatchOperand_NoMatch;
4192   case AsmToken::Integer:
4193     // The operand can be an integer value.
4194     if (getParser().parseAbsoluteExpression(Imm16Val))
4195       return MatchOperand_NoMatch;
4196     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4197       Error(S, "invalid immediate: only 16-bit values are legal");
4198       // Do not return error code, but create an imm operand anyway and proceed
4199       // to the next operand, if any. That avoids unneccessary error messages.
4200     }
4201     break;
4202   case AsmToken::Identifier: {
4203       OperandInfoTy Msg(ID_UNKNOWN_);
4204       OperandInfoTy Operation(OP_UNKNOWN_);
4205       int64_t StreamId = STREAM_ID_DEFAULT_;
4206       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4207         return MatchOperand_ParseFail;
4208       do {
4209         // Validate and encode message ID.
4210         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4211                 || Msg.Id == ID_SYSMSG)) {
4212           if (Msg.IsSymbolic)
4213             Error(S, "invalid/unsupported symbolic name of message");
4214           else
4215             Error(S, "invalid/unsupported code of message");
4216           break;
4217         }
4218         Imm16Val = (Msg.Id << ID_SHIFT_);
4219         // Validate and encode operation ID.
4220         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4221           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4222             if (Operation.IsSymbolic)
4223               Error(S, "invalid symbolic name of GS_OP");
4224             else
4225               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4226             break;
4227           }
4228           if (Operation.Id == OP_GS_NOP
4229               && Msg.Id != ID_GS_DONE) {
4230             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4231             break;
4232           }
4233           Imm16Val |= (Operation.Id << OP_SHIFT_);
4234         }
4235         if (Msg.Id == ID_SYSMSG) {
4236           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4237             if (Operation.IsSymbolic)
4238               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4239             else
4240               Error(S, "invalid/unsupported code of SYSMSG_OP");
4241             break;
4242           }
4243           Imm16Val |= (Operation.Id << OP_SHIFT_);
4244         }
4245         // Validate and encode stream ID.
4246         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4247           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4248             Error(S, "invalid stream id: only 2-bit values are legal");
4249             break;
4250           }
4251           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4252         }
4253       } while (false);
4254     }
4255     break;
4256   }
4257   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4258   return MatchOperand_Success;
4259 }
4260 
4261 bool AMDGPUOperand::isSendMsg() const {
4262   return isImmTy(ImmTySendMsg);
4263 }
4264 
4265 //===----------------------------------------------------------------------===//
4266 // parser helpers
4267 //===----------------------------------------------------------------------===//
4268 
4269 bool
4270 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4271   if (getLexer().getKind() == AsmToken::Identifier &&
4272       Parser.getTok().getString() == Id) {
4273     Parser.Lex();
4274     return true;
4275   }
4276   return false;
4277 }
4278 
4279 bool
4280 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4281   if (getLexer().getKind() == Kind) {
4282     Parser.Lex();
4283     return true;
4284   }
4285   return false;
4286 }
4287 
4288 bool
4289 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4290                            const StringRef ErrMsg) {
4291   if (!trySkipToken(Kind)) {
4292     Error(Parser.getTok().getLoc(), ErrMsg);
4293     return false;
4294   }
4295   return true;
4296 }
4297 
4298 bool
4299 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4300   return !getParser().parseAbsoluteExpression(Imm);
4301 }
4302 
4303 bool
4304 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4305   SMLoc S = Parser.getTok().getLoc();
4306   if (getLexer().getKind() == AsmToken::String) {
4307     Val = Parser.getTok().getStringContents();
4308     Parser.Lex();
4309     return true;
4310   } else {
4311     Error(S, ErrMsg);
4312     return false;
4313   }
4314 }
4315 
4316 //===----------------------------------------------------------------------===//
4317 // swizzle
4318 //===----------------------------------------------------------------------===//
4319 
4320 LLVM_READNONE
4321 static unsigned
4322 encodeBitmaskPerm(const unsigned AndMask,
4323                   const unsigned OrMask,
4324                   const unsigned XorMask) {
4325   using namespace llvm::AMDGPU::Swizzle;
4326 
4327   return BITMASK_PERM_ENC |
4328          (AndMask << BITMASK_AND_SHIFT) |
4329          (OrMask  << BITMASK_OR_SHIFT)  |
4330          (XorMask << BITMASK_XOR_SHIFT);
4331 }
4332 
4333 bool
4334 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4335                                       const unsigned MinVal,
4336                                       const unsigned MaxVal,
4337                                       const StringRef ErrMsg) {
4338   for (unsigned i = 0; i < OpNum; ++i) {
4339     if (!skipToken(AsmToken::Comma, "expected a comma")){
4340       return false;
4341     }
4342     SMLoc ExprLoc = Parser.getTok().getLoc();
4343     if (!parseExpr(Op[i])) {
4344       return false;
4345     }
4346     if (Op[i] < MinVal || Op[i] > MaxVal) {
4347       Error(ExprLoc, ErrMsg);
4348       return false;
4349     }
4350   }
4351 
4352   return true;
4353 }
4354 
4355 bool
4356 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4357   using namespace llvm::AMDGPU::Swizzle;
4358 
4359   int64_t Lane[LANE_NUM];
4360   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4361                            "expected a 2-bit lane id")) {
4362     Imm = QUAD_PERM_ENC;
4363     for (auto i = 0; i < LANE_NUM; ++i) {
4364       Imm |= Lane[i] << (LANE_SHIFT * i);
4365     }
4366     return true;
4367   }
4368   return false;
4369 }
4370 
4371 bool
4372 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4373   using namespace llvm::AMDGPU::Swizzle;
4374 
4375   SMLoc S = Parser.getTok().getLoc();
4376   int64_t GroupSize;
4377   int64_t LaneIdx;
4378 
4379   if (!parseSwizzleOperands(1, &GroupSize,
4380                             2, 32,
4381                             "group size must be in the interval [2,32]")) {
4382     return false;
4383   }
4384   if (!isPowerOf2_64(GroupSize)) {
4385     Error(S, "group size must be a power of two");
4386     return false;
4387   }
4388   if (parseSwizzleOperands(1, &LaneIdx,
4389                            0, GroupSize - 1,
4390                            "lane id must be in the interval [0,group size - 1]")) {
4391     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4392     return true;
4393   }
4394   return false;
4395 }
4396 
4397 bool
4398 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4399   using namespace llvm::AMDGPU::Swizzle;
4400 
4401   SMLoc S = Parser.getTok().getLoc();
4402   int64_t GroupSize;
4403 
4404   if (!parseSwizzleOperands(1, &GroupSize,
4405       2, 32, "group size must be in the interval [2,32]")) {
4406     return false;
4407   }
4408   if (!isPowerOf2_64(GroupSize)) {
4409     Error(S, "group size must be a power of two");
4410     return false;
4411   }
4412 
4413   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4414   return true;
4415 }
4416 
4417 bool
4418 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4419   using namespace llvm::AMDGPU::Swizzle;
4420 
4421   SMLoc S = Parser.getTok().getLoc();
4422   int64_t GroupSize;
4423 
4424   if (!parseSwizzleOperands(1, &GroupSize,
4425       1, 16, "group size must be in the interval [1,16]")) {
4426     return false;
4427   }
4428   if (!isPowerOf2_64(GroupSize)) {
4429     Error(S, "group size must be a power of two");
4430     return false;
4431   }
4432 
4433   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4434   return true;
4435 }
4436 
4437 bool
4438 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4439   using namespace llvm::AMDGPU::Swizzle;
4440 
4441   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4442     return false;
4443   }
4444 
4445   StringRef Ctl;
4446   SMLoc StrLoc = Parser.getTok().getLoc();
4447   if (!parseString(Ctl)) {
4448     return false;
4449   }
4450   if (Ctl.size() != BITMASK_WIDTH) {
4451     Error(StrLoc, "expected a 5-character mask");
4452     return false;
4453   }
4454 
4455   unsigned AndMask = 0;
4456   unsigned OrMask = 0;
4457   unsigned XorMask = 0;
4458 
4459   for (size_t i = 0; i < Ctl.size(); ++i) {
4460     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4461     switch(Ctl[i]) {
4462     default:
4463       Error(StrLoc, "invalid mask");
4464       return false;
4465     case '0':
4466       break;
4467     case '1':
4468       OrMask |= Mask;
4469       break;
4470     case 'p':
4471       AndMask |= Mask;
4472       break;
4473     case 'i':
4474       AndMask |= Mask;
4475       XorMask |= Mask;
4476       break;
4477     }
4478   }
4479 
4480   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4481   return true;
4482 }
4483 
4484 bool
4485 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4486 
4487   SMLoc OffsetLoc = Parser.getTok().getLoc();
4488 
4489   if (!parseExpr(Imm)) {
4490     return false;
4491   }
4492   if (!isUInt<16>(Imm)) {
4493     Error(OffsetLoc, "expected a 16-bit offset");
4494     return false;
4495   }
4496   return true;
4497 }
4498 
4499 bool
4500 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4501   using namespace llvm::AMDGPU::Swizzle;
4502 
4503   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4504 
4505     SMLoc ModeLoc = Parser.getTok().getLoc();
4506     bool Ok = false;
4507 
4508     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4509       Ok = parseSwizzleQuadPerm(Imm);
4510     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4511       Ok = parseSwizzleBitmaskPerm(Imm);
4512     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4513       Ok = parseSwizzleBroadcast(Imm);
4514     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4515       Ok = parseSwizzleSwap(Imm);
4516     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4517       Ok = parseSwizzleReverse(Imm);
4518     } else {
4519       Error(ModeLoc, "expected a swizzle mode");
4520     }
4521 
4522     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4523   }
4524 
4525   return false;
4526 }
4527 
4528 OperandMatchResultTy
4529 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4530   SMLoc S = Parser.getTok().getLoc();
4531   int64_t Imm = 0;
4532 
4533   if (trySkipId("offset")) {
4534 
4535     bool Ok = false;
4536     if (skipToken(AsmToken::Colon, "expected a colon")) {
4537       if (trySkipId("swizzle")) {
4538         Ok = parseSwizzleMacro(Imm);
4539       } else {
4540         Ok = parseSwizzleOffset(Imm);
4541       }
4542     }
4543 
4544     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4545 
4546     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4547   } else {
4548     // Swizzle "offset" operand is optional.
4549     // If it is omitted, try parsing other optional operands.
4550     return parseOptionalOpr(Operands);
4551   }
4552 }
4553 
4554 bool
4555 AMDGPUOperand::isSwizzle() const {
4556   return isImmTy(ImmTySwizzle);
4557 }
4558 
4559 //===----------------------------------------------------------------------===//
4560 // sopp branch targets
4561 //===----------------------------------------------------------------------===//
4562 
4563 OperandMatchResultTy
4564 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4565   SMLoc S = Parser.getTok().getLoc();
4566 
4567   switch (getLexer().getKind()) {
4568     default: return MatchOperand_ParseFail;
4569     case AsmToken::Integer: {
4570       int64_t Imm;
4571       if (getParser().parseAbsoluteExpression(Imm))
4572         return MatchOperand_ParseFail;
4573       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4574       return MatchOperand_Success;
4575     }
4576 
4577     case AsmToken::Identifier:
4578       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4579           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4580                                   Parser.getTok().getString()), getContext()), S));
4581       Parser.Lex();
4582       return MatchOperand_Success;
4583   }
4584 }
4585 
4586 //===----------------------------------------------------------------------===//
4587 // mubuf
4588 //===----------------------------------------------------------------------===//
4589 
4590 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4591   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4592 }
4593 
4594 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4595   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4596 }
4597 
4598 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4599                                const OperandVector &Operands,
4600                                bool IsAtomic,
4601                                bool IsAtomicReturn,
4602                                bool IsLds) {
4603   bool IsLdsOpcode = IsLds;
4604   bool HasLdsModifier = false;
4605   OptionalImmIndexMap OptionalIdx;
4606   assert(IsAtomicReturn ? IsAtomic : true);
4607 
4608   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4609     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4610 
4611     // Add the register arguments
4612     if (Op.isReg()) {
4613       Op.addRegOperands(Inst, 1);
4614       continue;
4615     }
4616 
4617     // Handle the case where soffset is an immediate
4618     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4619       Op.addImmOperands(Inst, 1);
4620       continue;
4621     }
4622 
4623     HasLdsModifier = Op.isLDS();
4624 
4625     // Handle tokens like 'offen' which are sometimes hard-coded into the
4626     // asm string.  There are no MCInst operands for these.
4627     if (Op.isToken()) {
4628       continue;
4629     }
4630     assert(Op.isImm());
4631 
4632     // Handle optional arguments
4633     OptionalIdx[Op.getImmTy()] = i;
4634   }
4635 
4636   // This is a workaround for an llvm quirk which may result in an
4637   // incorrect instruction selection. Lds and non-lds versions of
4638   // MUBUF instructions are identical except that lds versions
4639   // have mandatory 'lds' modifier. However this modifier follows
4640   // optional modifiers and llvm asm matcher regards this 'lds'
4641   // modifier as an optional one. As a result, an lds version
4642   // of opcode may be selected even if it has no 'lds' modifier.
4643   if (IsLdsOpcode && !HasLdsModifier) {
4644     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4645     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4646       Inst.setOpcode(NoLdsOpcode);
4647       IsLdsOpcode = false;
4648     }
4649   }
4650 
4651   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4652   if (IsAtomicReturn) {
4653     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4654     Inst.insert(I, *I);
4655   }
4656 
4657   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4658   if (!IsAtomic) { // glc is hard-coded.
4659     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4660   }
4661   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4662 
4663   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4664     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4665   }
4666 }
4667 
4668 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4669   OptionalImmIndexMap OptionalIdx;
4670 
4671   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4672     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4673 
4674     // Add the register arguments
4675     if (Op.isReg()) {
4676       Op.addRegOperands(Inst, 1);
4677       continue;
4678     }
4679 
4680     // Handle the case where soffset is an immediate
4681     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4682       Op.addImmOperands(Inst, 1);
4683       continue;
4684     }
4685 
4686     // Handle tokens like 'offen' which are sometimes hard-coded into the
4687     // asm string.  There are no MCInst operands for these.
4688     if (Op.isToken()) {
4689       continue;
4690     }
4691     assert(Op.isImm());
4692 
4693     // Handle optional arguments
4694     OptionalIdx[Op.getImmTy()] = i;
4695   }
4696 
4697   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4698                         AMDGPUOperand::ImmTyOffset);
4699   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4700   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4701   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4702   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4703 }
4704 
4705 //===----------------------------------------------------------------------===//
4706 // mimg
4707 //===----------------------------------------------------------------------===//
4708 
4709 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4710                               bool IsAtomic) {
4711   unsigned I = 1;
4712   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4713   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4714     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4715   }
4716 
4717   if (IsAtomic) {
4718     // Add src, same as dst
4719     assert(Desc.getNumDefs() == 1);
4720     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4721   }
4722 
4723   OptionalImmIndexMap OptionalIdx;
4724 
4725   for (unsigned E = Operands.size(); I != E; ++I) {
4726     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4727 
4728     // Add the register arguments
4729     if (Op.isReg()) {
4730       Op.addRegOperands(Inst, 1);
4731     } else if (Op.isImmModifier()) {
4732       OptionalIdx[Op.getImmTy()] = I;
4733     } else {
4734       llvm_unreachable("unexpected operand type");
4735     }
4736   }
4737 
4738   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4739   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4740   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4741   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4742   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4743   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4744   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4745   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4746   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4747 }
4748 
4749 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4750   cvtMIMG(Inst, Operands, true);
4751 }
4752 
4753 //===----------------------------------------------------------------------===//
4754 // smrd
4755 //===----------------------------------------------------------------------===//
4756 
4757 bool AMDGPUOperand::isSMRDOffset8() const {
4758   return isImm() && isUInt<8>(getImm());
4759 }
4760 
4761 bool AMDGPUOperand::isSMRDOffset20() const {
4762   return isImm() && isUInt<20>(getImm());
4763 }
4764 
4765 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4766   // 32-bit literals are only supported on CI and we only want to use them
4767   // when the offset is > 8-bits.
4768   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4769 }
4770 
4771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4772   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4773 }
4774 
4775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4776   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4777 }
4778 
4779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4780   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4781 }
4782 
4783 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4784   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4785 }
4786 
4787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4788   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4789 }
4790 
4791 //===----------------------------------------------------------------------===//
4792 // vop3
4793 //===----------------------------------------------------------------------===//
4794 
4795 static bool ConvertOmodMul(int64_t &Mul) {
4796   if (Mul != 1 && Mul != 2 && Mul != 4)
4797     return false;
4798 
4799   Mul >>= 1;
4800   return true;
4801 }
4802 
4803 static bool ConvertOmodDiv(int64_t &Div) {
4804   if (Div == 1) {
4805     Div = 0;
4806     return true;
4807   }
4808 
4809   if (Div == 2) {
4810     Div = 3;
4811     return true;
4812   }
4813 
4814   return false;
4815 }
4816 
4817 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4818   if (BoundCtrl == 0) {
4819     BoundCtrl = 1;
4820     return true;
4821   }
4822 
4823   if (BoundCtrl == -1) {
4824     BoundCtrl = 0;
4825     return true;
4826   }
4827 
4828   return false;
4829 }
4830 
4831 // Note: the order in this table matches the order of operands in AsmString.
4832 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4833   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4834   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4835   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4836   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4837   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4838   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4839   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
4840   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4841   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4842   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
4843   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4844   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4845   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4846   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
4847   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4848   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4849   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4850   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4851   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4852   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
4853   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
4854   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4855   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
4856   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4857   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4858   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4859   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4860   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4861   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4862   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4863   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4864   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4865   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4866   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4867   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4868   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4869   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4870 };
4871 
4872 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4873   unsigned size = Operands.size();
4874   assert(size > 0);
4875 
4876   OperandMatchResultTy res = parseOptionalOpr(Operands);
4877 
4878   // This is a hack to enable hardcoded mandatory operands which follow
4879   // optional operands.
4880   //
4881   // Current design assumes that all operands after the first optional operand
4882   // are also optional. However implementation of some instructions violates
4883   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4884   //
4885   // To alleviate this problem, we have to (implicitly) parse extra operands
4886   // to make sure autogenerated parser of custom operands never hit hardcoded
4887   // mandatory operands.
4888 
4889   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4890 
4891     // We have parsed the first optional operand.
4892     // Parse as many operands as necessary to skip all mandatory operands.
4893 
4894     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4895       if (res != MatchOperand_Success ||
4896           getLexer().is(AsmToken::EndOfStatement)) break;
4897       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4898       res = parseOptionalOpr(Operands);
4899     }
4900   }
4901 
4902   return res;
4903 }
4904 
4905 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4906   OperandMatchResultTy res;
4907   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4908     // try to parse any optional operand here
4909     if (Op.IsBit) {
4910       res = parseNamedBit(Op.Name, Operands, Op.Type);
4911     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4912       res = parseOModOperand(Operands);
4913     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4914                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4915                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4916       res = parseSDWASel(Operands, Op.Name, Op.Type);
4917     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4918       res = parseSDWADstUnused(Operands);
4919     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4920                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4921                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4922                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4923       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4924                                         Op.ConvertResult);
4925     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
4926       res = parseDfmtNfmt(Operands);
4927     } else {
4928       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4929     }
4930     if (res != MatchOperand_NoMatch) {
4931       return res;
4932     }
4933   }
4934   return MatchOperand_NoMatch;
4935 }
4936 
4937 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4938   StringRef Name = Parser.getTok().getString();
4939   if (Name == "mul") {
4940     return parseIntWithPrefix("mul", Operands,
4941                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4942   }
4943 
4944   if (Name == "div") {
4945     return parseIntWithPrefix("div", Operands,
4946                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4947   }
4948 
4949   return MatchOperand_NoMatch;
4950 }
4951 
4952 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4953   cvtVOP3P(Inst, Operands);
4954 
4955   int Opc = Inst.getOpcode();
4956 
4957   int SrcNum;
4958   const int Ops[] = { AMDGPU::OpName::src0,
4959                       AMDGPU::OpName::src1,
4960                       AMDGPU::OpName::src2 };
4961   for (SrcNum = 0;
4962        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4963        ++SrcNum);
4964   assert(SrcNum > 0);
4965 
4966   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4967   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4968 
4969   if ((OpSel & (1 << SrcNum)) != 0) {
4970     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4971     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4972     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4973   }
4974 }
4975 
4976 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4977       // 1. This operand is input modifiers
4978   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4979       // 2. This is not last operand
4980       && Desc.NumOperands > (OpNum + 1)
4981       // 3. Next operand is register class
4982       && Desc.OpInfo[OpNum + 1].RegClass != -1
4983       // 4. Next register is not tied to any other operand
4984       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4985 }
4986 
4987 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4988 {
4989   OptionalImmIndexMap OptionalIdx;
4990   unsigned Opc = Inst.getOpcode();
4991 
4992   unsigned I = 1;
4993   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4994   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4995     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4996   }
4997 
4998   for (unsigned E = Operands.size(); I != E; ++I) {
4999     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5000     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5001       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5002     } else if (Op.isInterpSlot() ||
5003                Op.isInterpAttr() ||
5004                Op.isAttrChan()) {
5005       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5006     } else if (Op.isImmModifier()) {
5007       OptionalIdx[Op.getImmTy()] = I;
5008     } else {
5009       llvm_unreachable("unhandled operand type");
5010     }
5011   }
5012 
5013   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5014     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5015   }
5016 
5017   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5018     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5019   }
5020 
5021   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5022     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5023   }
5024 }
5025 
5026 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5027                               OptionalImmIndexMap &OptionalIdx) {
5028   unsigned Opc = Inst.getOpcode();
5029 
5030   unsigned I = 1;
5031   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5032   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5033     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5034   }
5035 
5036   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5037     // This instruction has src modifiers
5038     for (unsigned E = Operands.size(); I != E; ++I) {
5039       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5040       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5041         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5042       } else if (Op.isImmModifier()) {
5043         OptionalIdx[Op.getImmTy()] = I;
5044       } else if (Op.isRegOrImm()) {
5045         Op.addRegOrImmOperands(Inst, 1);
5046       } else {
5047         llvm_unreachable("unhandled operand type");
5048       }
5049     }
5050   } else {
5051     // No src modifiers
5052     for (unsigned E = Operands.size(); I != E; ++I) {
5053       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5054       if (Op.isMod()) {
5055         OptionalIdx[Op.getImmTy()] = I;
5056       } else {
5057         Op.addRegOrImmOperands(Inst, 1);
5058       }
5059     }
5060   }
5061 
5062   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5063     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5064   }
5065 
5066   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5067     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5068   }
5069 
5070   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5071   // it has src2 register operand that is tied to dst operand
5072   // we don't allow modifiers for this operand in assembler so src2_modifiers
5073   // should be 0.
5074   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5075       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5076       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5077       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5078     auto it = Inst.begin();
5079     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5080     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5081     ++it;
5082     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5083   }
5084 }
5085 
5086 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5087   OptionalImmIndexMap OptionalIdx;
5088   cvtVOP3(Inst, Operands, OptionalIdx);
5089 }
5090 
5091 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5092                                const OperandVector &Operands) {
5093   OptionalImmIndexMap OptIdx;
5094   const int Opc = Inst.getOpcode();
5095   const MCInstrDesc &Desc = MII.get(Opc);
5096 
5097   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5098 
5099   cvtVOP3(Inst, Operands, OptIdx);
5100 
5101   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5102     assert(!IsPacked);
5103     Inst.addOperand(Inst.getOperand(0));
5104   }
5105 
5106   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5107   // instruction, and then figure out where to actually put the modifiers
5108 
5109   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5110 
5111   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5112   if (OpSelHiIdx != -1) {
5113     int DefaultVal = IsPacked ? -1 : 0;
5114     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5115                           DefaultVal);
5116   }
5117 
5118   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5119   if (NegLoIdx != -1) {
5120     assert(IsPacked);
5121     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5122     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5123   }
5124 
5125   const int Ops[] = { AMDGPU::OpName::src0,
5126                       AMDGPU::OpName::src1,
5127                       AMDGPU::OpName::src2 };
5128   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5129                          AMDGPU::OpName::src1_modifiers,
5130                          AMDGPU::OpName::src2_modifiers };
5131 
5132   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5133 
5134   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5135   unsigned OpSelHi = 0;
5136   unsigned NegLo = 0;
5137   unsigned NegHi = 0;
5138 
5139   if (OpSelHiIdx != -1) {
5140     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5141   }
5142 
5143   if (NegLoIdx != -1) {
5144     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5145     NegLo = Inst.getOperand(NegLoIdx).getImm();
5146     NegHi = Inst.getOperand(NegHiIdx).getImm();
5147   }
5148 
5149   for (int J = 0; J < 3; ++J) {
5150     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5151     if (OpIdx == -1)
5152       break;
5153 
5154     uint32_t ModVal = 0;
5155 
5156     if ((OpSel & (1 << J)) != 0)
5157       ModVal |= SISrcMods::OP_SEL_0;
5158 
5159     if ((OpSelHi & (1 << J)) != 0)
5160       ModVal |= SISrcMods::OP_SEL_1;
5161 
5162     if ((NegLo & (1 << J)) != 0)
5163       ModVal |= SISrcMods::NEG;
5164 
5165     if ((NegHi & (1 << J)) != 0)
5166       ModVal |= SISrcMods::NEG_HI;
5167 
5168     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5169 
5170     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5171   }
5172 }
5173 
5174 //===----------------------------------------------------------------------===//
5175 // dpp
5176 //===----------------------------------------------------------------------===//
5177 
5178 bool AMDGPUOperand::isDPPCtrl() const {
5179   using namespace AMDGPU::DPP;
5180 
5181   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5182   if (result) {
5183     int64_t Imm = getImm();
5184     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5185            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5186            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5187            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5188            (Imm == DppCtrl::WAVE_SHL1) ||
5189            (Imm == DppCtrl::WAVE_ROL1) ||
5190            (Imm == DppCtrl::WAVE_SHR1) ||
5191            (Imm == DppCtrl::WAVE_ROR1) ||
5192            (Imm == DppCtrl::ROW_MIRROR) ||
5193            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5194            (Imm == DppCtrl::BCAST15) ||
5195            (Imm == DppCtrl::BCAST31);
5196   }
5197   return false;
5198 }
5199 
5200 bool AMDGPUOperand::isGPRIdxMode() const {
5201   return isImm() && isUInt<4>(getImm());
5202 }
5203 
5204 bool AMDGPUOperand::isS16Imm() const {
5205   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5206 }
5207 
5208 bool AMDGPUOperand::isU16Imm() const {
5209   return isImm() && isUInt<16>(getImm());
5210 }
5211 
5212 OperandMatchResultTy
5213 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5214   using namespace AMDGPU::DPP;
5215 
5216   SMLoc S = Parser.getTok().getLoc();
5217   StringRef Prefix;
5218   int64_t Int;
5219 
5220   if (getLexer().getKind() == AsmToken::Identifier) {
5221     Prefix = Parser.getTok().getString();
5222   } else {
5223     return MatchOperand_NoMatch;
5224   }
5225 
5226   if (Prefix == "row_mirror") {
5227     Int = DppCtrl::ROW_MIRROR;
5228     Parser.Lex();
5229   } else if (Prefix == "row_half_mirror") {
5230     Int = DppCtrl::ROW_HALF_MIRROR;
5231     Parser.Lex();
5232   } else {
5233     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5234     if (Prefix != "quad_perm"
5235         && Prefix != "row_shl"
5236         && Prefix != "row_shr"
5237         && Prefix != "row_ror"
5238         && Prefix != "wave_shl"
5239         && Prefix != "wave_rol"
5240         && Prefix != "wave_shr"
5241         && Prefix != "wave_ror"
5242         && Prefix != "row_bcast") {
5243       return MatchOperand_NoMatch;
5244     }
5245 
5246     Parser.Lex();
5247     if (getLexer().isNot(AsmToken::Colon))
5248       return MatchOperand_ParseFail;
5249 
5250     if (Prefix == "quad_perm") {
5251       // quad_perm:[%d,%d,%d,%d]
5252       Parser.Lex();
5253       if (getLexer().isNot(AsmToken::LBrac))
5254         return MatchOperand_ParseFail;
5255       Parser.Lex();
5256 
5257       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5258         return MatchOperand_ParseFail;
5259 
5260       for (int i = 0; i < 3; ++i) {
5261         if (getLexer().isNot(AsmToken::Comma))
5262           return MatchOperand_ParseFail;
5263         Parser.Lex();
5264 
5265         int64_t Temp;
5266         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5267           return MatchOperand_ParseFail;
5268         const int shift = i*2 + 2;
5269         Int += (Temp << shift);
5270       }
5271 
5272       if (getLexer().isNot(AsmToken::RBrac))
5273         return MatchOperand_ParseFail;
5274       Parser.Lex();
5275     } else {
5276       // sel:%d
5277       Parser.Lex();
5278       if (getParser().parseAbsoluteExpression(Int))
5279         return MatchOperand_ParseFail;
5280 
5281       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5282         Int |= DppCtrl::ROW_SHL0;
5283       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5284         Int |= DppCtrl::ROW_SHR0;
5285       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5286         Int |= DppCtrl::ROW_ROR0;
5287       } else if (Prefix == "wave_shl" && 1 == Int) {
5288         Int = DppCtrl::WAVE_SHL1;
5289       } else if (Prefix == "wave_rol" && 1 == Int) {
5290         Int = DppCtrl::WAVE_ROL1;
5291       } else if (Prefix == "wave_shr" && 1 == Int) {
5292         Int = DppCtrl::WAVE_SHR1;
5293       } else if (Prefix == "wave_ror" && 1 == Int) {
5294         Int = DppCtrl::WAVE_ROR1;
5295       } else if (Prefix == "row_bcast") {
5296         if (Int == 15) {
5297           Int = DppCtrl::BCAST15;
5298         } else if (Int == 31) {
5299           Int = DppCtrl::BCAST31;
5300         } else {
5301           return MatchOperand_ParseFail;
5302         }
5303       } else {
5304         return MatchOperand_ParseFail;
5305       }
5306     }
5307   }
5308 
5309   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5310   return MatchOperand_Success;
5311 }
5312 
5313 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5314   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5315 }
5316 
5317 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5318   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5319 }
5320 
5321 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5322   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5323 }
5324 
5325 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5326   OptionalImmIndexMap OptionalIdx;
5327 
5328   unsigned I = 1;
5329   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5330   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5331     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5332   }
5333 
5334   for (unsigned E = Operands.size(); I != E; ++I) {
5335     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5336                                             MCOI::TIED_TO);
5337     if (TiedTo != -1) {
5338       assert((unsigned)TiedTo < Inst.getNumOperands());
5339       // handle tied old or src2 for MAC instructions
5340       Inst.addOperand(Inst.getOperand(TiedTo));
5341     }
5342     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5343     // Add the register arguments
5344     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5345       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5346       // Skip it.
5347       continue;
5348     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5349       Op.addRegWithFPInputModsOperands(Inst, 2);
5350     } else if (Op.isDPPCtrl()) {
5351       Op.addImmOperands(Inst, 1);
5352     } else if (Op.isImm()) {
5353       // Handle optional arguments
5354       OptionalIdx[Op.getImmTy()] = I;
5355     } else {
5356       llvm_unreachable("Invalid operand type");
5357     }
5358   }
5359 
5360   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5361   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5362   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5363 }
5364 
5365 //===----------------------------------------------------------------------===//
5366 // sdwa
5367 //===----------------------------------------------------------------------===//
5368 
5369 OperandMatchResultTy
5370 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5371                               AMDGPUOperand::ImmTy Type) {
5372   using namespace llvm::AMDGPU::SDWA;
5373 
5374   SMLoc S = Parser.getTok().getLoc();
5375   StringRef Value;
5376   OperandMatchResultTy res;
5377 
5378   res = parseStringWithPrefix(Prefix, Value);
5379   if (res != MatchOperand_Success) {
5380     return res;
5381   }
5382 
5383   int64_t Int;
5384   Int = StringSwitch<int64_t>(Value)
5385         .Case("BYTE_0", SdwaSel::BYTE_0)
5386         .Case("BYTE_1", SdwaSel::BYTE_1)
5387         .Case("BYTE_2", SdwaSel::BYTE_2)
5388         .Case("BYTE_3", SdwaSel::BYTE_3)
5389         .Case("WORD_0", SdwaSel::WORD_0)
5390         .Case("WORD_1", SdwaSel::WORD_1)
5391         .Case("DWORD", SdwaSel::DWORD)
5392         .Default(0xffffffff);
5393   Parser.Lex(); // eat last token
5394 
5395   if (Int == 0xffffffff) {
5396     return MatchOperand_ParseFail;
5397   }
5398 
5399   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5400   return MatchOperand_Success;
5401 }
5402 
5403 OperandMatchResultTy
5404 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5405   using namespace llvm::AMDGPU::SDWA;
5406 
5407   SMLoc S = Parser.getTok().getLoc();
5408   StringRef Value;
5409   OperandMatchResultTy res;
5410 
5411   res = parseStringWithPrefix("dst_unused", Value);
5412   if (res != MatchOperand_Success) {
5413     return res;
5414   }
5415 
5416   int64_t Int;
5417   Int = StringSwitch<int64_t>(Value)
5418         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5419         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5420         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5421         .Default(0xffffffff);
5422   Parser.Lex(); // eat last token
5423 
5424   if (Int == 0xffffffff) {
5425     return MatchOperand_ParseFail;
5426   }
5427 
5428   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5429   return MatchOperand_Success;
5430 }
5431 
5432 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5433   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5434 }
5435 
5436 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5437   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5438 }
5439 
5440 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5441   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5442 }
5443 
5444 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5445   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5446 }
5447 
5448 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5449                               uint64_t BasicInstType, bool skipVcc) {
5450   using namespace llvm::AMDGPU::SDWA;
5451 
5452   OptionalImmIndexMap OptionalIdx;
5453   bool skippedVcc = false;
5454 
5455   unsigned I = 1;
5456   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5457   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5458     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5459   }
5460 
5461   for (unsigned E = Operands.size(); I != E; ++I) {
5462     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5463     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5464       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5465       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5466       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5467       // Skip VCC only if we didn't skip it on previous iteration.
5468       if (BasicInstType == SIInstrFlags::VOP2 &&
5469           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5470         skippedVcc = true;
5471         continue;
5472       } else if (BasicInstType == SIInstrFlags::VOPC &&
5473                  Inst.getNumOperands() == 0) {
5474         skippedVcc = true;
5475         continue;
5476       }
5477     }
5478     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5479       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5480     } else if (Op.isImm()) {
5481       // Handle optional arguments
5482       OptionalIdx[Op.getImmTy()] = I;
5483     } else {
5484       llvm_unreachable("Invalid operand type");
5485     }
5486     skippedVcc = false;
5487   }
5488 
5489   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5490       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5491     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5492     switch (BasicInstType) {
5493     case SIInstrFlags::VOP1:
5494       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5495       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5496         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5497       }
5498       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5499       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5500       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5501       break;
5502 
5503     case SIInstrFlags::VOP2:
5504       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5505       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5506         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5507       }
5508       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5509       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5510       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5511       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5512       break;
5513 
5514     case SIInstrFlags::VOPC:
5515       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5516       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5517       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5518       break;
5519 
5520     default:
5521       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5522     }
5523   }
5524 
5525   // special case v_mac_{f16, f32}:
5526   // it has src2 register operand that is tied to dst operand
5527   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5528       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5529     auto it = Inst.begin();
5530     std::advance(
5531       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5532     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5533   }
5534 }
5535 
5536 /// Force static initialization.
5537 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5538   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5539   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5540 }
5541 
5542 #define GET_REGISTER_MATCHER
5543 #define GET_MATCHER_IMPLEMENTATION
5544 #define GET_MNEMONIC_SPELL_CHECKER
5545 #include "AMDGPUGenAsmMatcher.inc"
5546 
5547 // This fuction should be defined after auto-generated include so that we have
5548 // MatchClassKind enum defined
5549 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5550                                                      unsigned Kind) {
5551   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5552   // But MatchInstructionImpl() expects to meet token and fails to validate
5553   // operand. This method checks if we are given immediate operand but expect to
5554   // get corresponding token.
5555   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5556   switch (Kind) {
5557   case MCK_addr64:
5558     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5559   case MCK_gds:
5560     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5561   case MCK_lds:
5562     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5563   case MCK_glc:
5564     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5565   case MCK_idxen:
5566     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5567   case MCK_offen:
5568     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5569   case MCK_SSrcB32:
5570     // When operands have expression values, they will return true for isToken,
5571     // because it is not possible to distinguish between a token and an
5572     // expression at parse time. MatchInstructionImpl() will always try to
5573     // match an operand as a token, when isToken returns true, and when the
5574     // name of the expression is not a valid token, the match will fail,
5575     // so we need to handle it here.
5576     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5577   case MCK_SSrcF32:
5578     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5579   case MCK_SoppBrTarget:
5580     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5581   case MCK_VReg32OrOff:
5582     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5583   case MCK_InterpSlot:
5584     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5585   case MCK_Attr:
5586     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5587   case MCK_AttrChan:
5588     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5589   default:
5590     return Match_InvalidOperand;
5591   }
5592 }
5593