1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0;
106       Operand |= Neg ? SISrcMods::NEG : 0;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyHigh
177   };
178 
179   struct TokOp {
180     const char *Data;
181     unsigned Length;
182   };
183 
184   struct ImmOp {
185     int64_t Val;
186     ImmTy Type;
187     bool IsFPImm;
188     Modifiers Mods;
189   };
190 
191   struct RegOp {
192     unsigned RegNo;
193     bool IsForcedVOP3;
194     Modifiers Mods;
195   };
196 
197   union {
198     TokOp Tok;
199     ImmOp Imm;
200     RegOp Reg;
201     const MCExpr *Expr;
202   };
203 
204   bool isToken() const override {
205     if (Kind == Token)
206       return true;
207 
208     if (Kind != Expression || !Expr)
209       return false;
210 
211     // When parsing operands, we can't always tell if something was meant to be
212     // a token, like 'gds', or an expression that references a global variable.
213     // In this case, we assume the string is an expression, and if we need to
214     // interpret is a token, then we treat the symbol name as the token.
215     return isa<MCSymbolRefExpr>(Expr);
216   }
217 
218   bool isImm() const override {
219     return Kind == Immediate;
220   }
221 
222   bool isInlinableImm(MVT type) const;
223   bool isLiteralImm(MVT type) const;
224 
225   bool isRegKind() const {
226     return Kind == Register;
227   }
228 
229   bool isReg() const override {
230     return isRegKind() && !hasModifiers();
231   }
232 
233   bool isRegOrImmWithInputMods(MVT type) const {
234     return isRegKind() || isInlinableImm(type);
235   }
236 
237   bool isRegOrImmWithInt16InputMods() const {
238     return isRegOrImmWithInputMods(MVT::i16);
239   }
240 
241   bool isRegOrImmWithInt32InputMods() const {
242     return isRegOrImmWithInputMods(MVT::i32);
243   }
244 
245   bool isRegOrImmWithInt64InputMods() const {
246     return isRegOrImmWithInputMods(MVT::i64);
247   }
248 
249   bool isRegOrImmWithFP16InputMods() const {
250     return isRegOrImmWithInputMods(MVT::f16);
251   }
252 
253   bool isRegOrImmWithFP32InputMods() const {
254     return isRegOrImmWithInputMods(MVT::f32);
255   }
256 
257   bool isRegOrImmWithFP64InputMods() const {
258     return isRegOrImmWithInputMods(MVT::f64);
259   }
260 
261   bool isVReg() const {
262     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
263            isRegClass(AMDGPU::VReg_64RegClassID) ||
264            isRegClass(AMDGPU::VReg_96RegClassID) ||
265            isRegClass(AMDGPU::VReg_128RegClassID) ||
266            isRegClass(AMDGPU::VReg_256RegClassID) ||
267            isRegClass(AMDGPU::VReg_512RegClassID);
268   }
269 
270   bool isVReg32OrOff() const {
271     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
272   }
273 
274   bool isSDWAOperand(MVT type) const;
275   bool isSDWAFP16Operand() const;
276   bool isSDWAFP32Operand() const;
277   bool isSDWAInt16Operand() const;
278   bool isSDWAInt32Operand() const;
279 
280   bool isImmTy(ImmTy ImmT) const {
281     return isImm() && Imm.Type == ImmT;
282   }
283 
284   bool isImmModifier() const {
285     return isImm() && Imm.Type != ImmTyNone;
286   }
287 
288   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
289   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
290   bool isDMask() const { return isImmTy(ImmTyDMask); }
291   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
292   bool isDA() const { return isImmTy(ImmTyDA); }
293   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
294   bool isLWE() const { return isImmTy(ImmTyLWE); }
295   bool isOff() const { return isImmTy(ImmTyOff); }
296   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
297   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
298   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
299   bool isOffen() const { return isImmTy(ImmTyOffen); }
300   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
301   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
302   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
303   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
304   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
305 
306   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
307   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
308   bool isGDS() const { return isImmTy(ImmTyGDS); }
309   bool isLDS() const { return isImmTy(ImmTyLDS); }
310   bool isGLC() const { return isImmTy(ImmTyGLC); }
311   bool isSLC() const { return isImmTy(ImmTySLC); }
312   bool isTFE() const { return isImmTy(ImmTyTFE); }
313   bool isD16() const { return isImmTy(ImmTyD16); }
314   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
315   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
316   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
317   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
318   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
319   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
320   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
321   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
322   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
323   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
324   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
325   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
326   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
327   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
328   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
329   bool isHigh() const { return isImmTy(ImmTyHigh); }
330 
331   bool isMod() const {
332     return isClampSI() || isOModSI();
333   }
334 
335   bool isRegOrImm() const {
336     return isReg() || isImm();
337   }
338 
339   bool isRegClass(unsigned RCID) const;
340 
341   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
342     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
343   }
344 
345   bool isSCSrcB16() const {
346     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
347   }
348 
349   bool isSCSrcV2B16() const {
350     return isSCSrcB16();
351   }
352 
353   bool isSCSrcB32() const {
354     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
355   }
356 
357   bool isSCSrcB64() const {
358     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
359   }
360 
361   bool isSCSrcF16() const {
362     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
363   }
364 
365   bool isSCSrcV2F16() const {
366     return isSCSrcF16();
367   }
368 
369   bool isSCSrcF32() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
371   }
372 
373   bool isSCSrcF64() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
375   }
376 
377   bool isSSrcB32() const {
378     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
379   }
380 
381   bool isSSrcB16() const {
382     return isSCSrcB16() || isLiteralImm(MVT::i16);
383   }
384 
385   bool isSSrcV2B16() const {
386     llvm_unreachable("cannot happen");
387     return isSSrcB16();
388   }
389 
390   bool isSSrcB64() const {
391     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
392     // See isVSrc64().
393     return isSCSrcB64() || isLiteralImm(MVT::i64);
394   }
395 
396   bool isSSrcF32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
398   }
399 
400   bool isSSrcF64() const {
401     return isSCSrcB64() || isLiteralImm(MVT::f64);
402   }
403 
404   bool isSSrcF16() const {
405     return isSCSrcB16() || isLiteralImm(MVT::f16);
406   }
407 
408   bool isSSrcV2F16() const {
409     llvm_unreachable("cannot happen");
410     return isSSrcF16();
411   }
412 
413   bool isVCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
415   }
416 
417   bool isVCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
419   }
420 
421   bool isVCSrcB16() const {
422     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
423   }
424 
425   bool isVCSrcV2B16() const {
426     return isVCSrcB16();
427   }
428 
429   bool isVCSrcF32() const {
430     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
431   }
432 
433   bool isVCSrcF64() const {
434     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
435   }
436 
437   bool isVCSrcF16() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
439   }
440 
441   bool isVCSrcV2F16() const {
442     return isVCSrcF16();
443   }
444 
445   bool isVSrcB32() const {
446     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
447   }
448 
449   bool isVSrcB64() const {
450     return isVCSrcF64() || isLiteralImm(MVT::i64);
451   }
452 
453   bool isVSrcB16() const {
454     return isVCSrcF16() || isLiteralImm(MVT::i16);
455   }
456 
457   bool isVSrcV2B16() const {
458     llvm_unreachable("cannot happen");
459     return isVSrcB16();
460   }
461 
462   bool isVSrcF32() const {
463     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
464   }
465 
466   bool isVSrcF64() const {
467     return isVCSrcF64() || isLiteralImm(MVT::f64);
468   }
469 
470   bool isVSrcF16() const {
471     return isVCSrcF16() || isLiteralImm(MVT::f16);
472   }
473 
474   bool isVSrcV2F16() const {
475     llvm_unreachable("cannot happen");
476     return isVSrcF16();
477   }
478 
479   bool isKImmFP32() const {
480     return isLiteralImm(MVT::f32);
481   }
482 
483   bool isKImmFP16() const {
484     return isLiteralImm(MVT::f16);
485   }
486 
487   bool isMem() const override {
488     return false;
489   }
490 
491   bool isExpr() const {
492     return Kind == Expression;
493   }
494 
495   bool isSoppBrTarget() const {
496     return isExpr() || isImm();
497   }
498 
499   bool isSWaitCnt() const;
500   bool isHwreg() const;
501   bool isSendMsg() const;
502   bool isSwizzle() const;
503   bool isSMRDOffset8() const;
504   bool isSMRDOffset20() const;
505   bool isSMRDLiteralOffset() const;
506   bool isDPPCtrl() const;
507   bool isGPRIdxMode() const;
508   bool isS16Imm() const;
509   bool isU16Imm() const;
510 
511   StringRef getExpressionAsToken() const {
512     assert(isExpr());
513     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
514     return S->getSymbol().getName();
515   }
516 
517   StringRef getToken() const {
518     assert(isToken());
519 
520     if (Kind == Expression)
521       return getExpressionAsToken();
522 
523     return StringRef(Tok.Data, Tok.Length);
524   }
525 
526   int64_t getImm() const {
527     assert(isImm());
528     return Imm.Val;
529   }
530 
531   ImmTy getImmTy() const {
532     assert(isImm());
533     return Imm.Type;
534   }
535 
536   unsigned getReg() const override {
537     return Reg.RegNo;
538   }
539 
540   SMLoc getStartLoc() const override {
541     return StartLoc;
542   }
543 
544   SMLoc getEndLoc() const override {
545     return EndLoc;
546   }
547 
548   SMRange getLocRange() const {
549     return SMRange(StartLoc, EndLoc);
550   }
551 
552   Modifiers getModifiers() const {
553     assert(isRegKind() || isImmTy(ImmTyNone));
554     return isRegKind() ? Reg.Mods : Imm.Mods;
555   }
556 
557   void setModifiers(Modifiers Mods) {
558     assert(isRegKind() || isImmTy(ImmTyNone));
559     if (isRegKind())
560       Reg.Mods = Mods;
561     else
562       Imm.Mods = Mods;
563   }
564 
565   bool hasModifiers() const {
566     return getModifiers().hasModifiers();
567   }
568 
569   bool hasFPModifiers() const {
570     return getModifiers().hasFPModifiers();
571   }
572 
573   bool hasIntModifiers() const {
574     return getModifiers().hasIntModifiers();
575   }
576 
577   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
578 
579   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
580 
581   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
582 
583   template <unsigned Bitwidth>
584   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
585 
586   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
587     addKImmFPOperands<16>(Inst, N);
588   }
589 
590   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
591     addKImmFPOperands<32>(Inst, N);
592   }
593 
594   void addRegOperands(MCInst &Inst, unsigned N) const;
595 
596   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
597     if (isRegKind())
598       addRegOperands(Inst, N);
599     else if (isExpr())
600       Inst.addOperand(MCOperand::createExpr(Expr));
601     else
602       addImmOperands(Inst, N);
603   }
604 
605   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
606     Modifiers Mods = getModifiers();
607     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
608     if (isRegKind()) {
609       addRegOperands(Inst, N);
610     } else {
611       addImmOperands(Inst, N, false);
612     }
613   }
614 
615   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
616     assert(!hasIntModifiers());
617     addRegOrImmWithInputModsOperands(Inst, N);
618   }
619 
620   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
621     assert(!hasFPModifiers());
622     addRegOrImmWithInputModsOperands(Inst, N);
623   }
624 
625   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
626     Modifiers Mods = getModifiers();
627     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
628     assert(isRegKind());
629     addRegOperands(Inst, N);
630   }
631 
632   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
633     assert(!hasIntModifiers());
634     addRegWithInputModsOperands(Inst, N);
635   }
636 
637   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
638     assert(!hasFPModifiers());
639     addRegWithInputModsOperands(Inst, N);
640   }
641 
642   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
643     if (isImm())
644       addImmOperands(Inst, N);
645     else {
646       assert(isExpr());
647       Inst.addOperand(MCOperand::createExpr(Expr));
648     }
649   }
650 
651   static void printImmTy(raw_ostream& OS, ImmTy Type) {
652     switch (Type) {
653     case ImmTyNone: OS << "None"; break;
654     case ImmTyGDS: OS << "GDS"; break;
655     case ImmTyLDS: OS << "LDS"; break;
656     case ImmTyOffen: OS << "Offen"; break;
657     case ImmTyIdxen: OS << "Idxen"; break;
658     case ImmTyAddr64: OS << "Addr64"; break;
659     case ImmTyOffset: OS << "Offset"; break;
660     case ImmTyInstOffset: OS << "InstOffset"; break;
661     case ImmTyOffset0: OS << "Offset0"; break;
662     case ImmTyOffset1: OS << "Offset1"; break;
663     case ImmTyGLC: OS << "GLC"; break;
664     case ImmTySLC: OS << "SLC"; break;
665     case ImmTyTFE: OS << "TFE"; break;
666     case ImmTyD16: OS << "D16"; break;
667     case ImmTyFORMAT: OS << "FORMAT"; break;
668     case ImmTyClampSI: OS << "ClampSI"; break;
669     case ImmTyOModSI: OS << "OModSI"; break;
670     case ImmTyDppCtrl: OS << "DppCtrl"; break;
671     case ImmTyDppRowMask: OS << "DppRowMask"; break;
672     case ImmTyDppBankMask: OS << "DppBankMask"; break;
673     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
674     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
675     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
676     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
677     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
678     case ImmTyDMask: OS << "DMask"; break;
679     case ImmTyUNorm: OS << "UNorm"; break;
680     case ImmTyDA: OS << "DA"; break;
681     case ImmTyR128A16: OS << "R128A16"; break;
682     case ImmTyLWE: OS << "LWE"; break;
683     case ImmTyOff: OS << "Off"; break;
684     case ImmTyExpTgt: OS << "ExpTgt"; break;
685     case ImmTyExpCompr: OS << "ExpCompr"; break;
686     case ImmTyExpVM: OS << "ExpVM"; break;
687     case ImmTyHwreg: OS << "Hwreg"; break;
688     case ImmTySendMsg: OS << "SendMsg"; break;
689     case ImmTyInterpSlot: OS << "InterpSlot"; break;
690     case ImmTyInterpAttr: OS << "InterpAttr"; break;
691     case ImmTyAttrChan: OS << "AttrChan"; break;
692     case ImmTyOpSel: OS << "OpSel"; break;
693     case ImmTyOpSelHi: OS << "OpSelHi"; break;
694     case ImmTyNegLo: OS << "NegLo"; break;
695     case ImmTyNegHi: OS << "NegHi"; break;
696     case ImmTySwizzle: OS << "Swizzle"; break;
697     case ImmTyHigh: OS << "High"; break;
698     }
699   }
700 
701   void print(raw_ostream &OS) const override {
702     switch (Kind) {
703     case Register:
704       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
705       break;
706     case Immediate:
707       OS << '<' << getImm();
708       if (getImmTy() != ImmTyNone) {
709         OS << " type: "; printImmTy(OS, getImmTy());
710       }
711       OS << " mods: " << Imm.Mods << '>';
712       break;
713     case Token:
714       OS << '\'' << getToken() << '\'';
715       break;
716     case Expression:
717       OS << "<expr " << *Expr << '>';
718       break;
719     }
720   }
721 
722   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
723                                       int64_t Val, SMLoc Loc,
724                                       ImmTy Type = ImmTyNone,
725                                       bool IsFPImm = false) {
726     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
727     Op->Imm.Val = Val;
728     Op->Imm.IsFPImm = IsFPImm;
729     Op->Imm.Type = Type;
730     Op->Imm.Mods = Modifiers();
731     Op->StartLoc = Loc;
732     Op->EndLoc = Loc;
733     return Op;
734   }
735 
736   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
737                                         StringRef Str, SMLoc Loc,
738                                         bool HasExplicitEncodingSize = true) {
739     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
740     Res->Tok.Data = Str.data();
741     Res->Tok.Length = Str.size();
742     Res->StartLoc = Loc;
743     Res->EndLoc = Loc;
744     return Res;
745   }
746 
747   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
748                                       unsigned RegNo, SMLoc S,
749                                       SMLoc E,
750                                       bool ForceVOP3) {
751     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
752     Op->Reg.RegNo = RegNo;
753     Op->Reg.Mods = Modifiers();
754     Op->Reg.IsForcedVOP3 = ForceVOP3;
755     Op->StartLoc = S;
756     Op->EndLoc = E;
757     return Op;
758   }
759 
760   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
761                                        const class MCExpr *Expr, SMLoc S) {
762     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
763     Op->Expr = Expr;
764     Op->StartLoc = S;
765     Op->EndLoc = S;
766     return Op;
767   }
768 };
769 
770 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
771   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
772   return OS;
773 }
774 
775 //===----------------------------------------------------------------------===//
776 // AsmParser
777 //===----------------------------------------------------------------------===//
778 
779 // Holds info related to the current kernel, e.g. count of SGPRs used.
780 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
781 // .amdgpu_hsa_kernel or at EOF.
782 class KernelScopeInfo {
783   int SgprIndexUnusedMin = -1;
784   int VgprIndexUnusedMin = -1;
785   MCContext *Ctx = nullptr;
786 
787   void usesSgprAt(int i) {
788     if (i >= SgprIndexUnusedMin) {
789       SgprIndexUnusedMin = ++i;
790       if (Ctx) {
791         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
792         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
793       }
794     }
795   }
796 
797   void usesVgprAt(int i) {
798     if (i >= VgprIndexUnusedMin) {
799       VgprIndexUnusedMin = ++i;
800       if (Ctx) {
801         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
802         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
803       }
804     }
805   }
806 
807 public:
808   KernelScopeInfo() = default;
809 
810   void initialize(MCContext &Context) {
811     Ctx = &Context;
812     usesSgprAt(SgprIndexUnusedMin = -1);
813     usesVgprAt(VgprIndexUnusedMin = -1);
814   }
815 
816   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
817     switch (RegKind) {
818       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
819       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
820       default: break;
821     }
822   }
823 };
824 
825 class AMDGPUAsmParser : public MCTargetAsmParser {
826   MCAsmParser &Parser;
827 
828   // Number of extra operands parsed after the first optional operand.
829   // This may be necessary to skip hardcoded mandatory operands.
830   static const unsigned MAX_OPR_LOOKAHEAD = 8;
831 
832   unsigned ForcedEncodingSize = 0;
833   bool ForcedDPP = false;
834   bool ForcedSDWA = false;
835   KernelScopeInfo KernelScope;
836 
837   /// @name Auto-generated Match Functions
838   /// {
839 
840 #define GET_ASSEMBLER_HEADER
841 #include "AMDGPUGenAsmMatcher.inc"
842 
843   /// }
844 
845 private:
846   bool ParseAsAbsoluteExpression(uint32_t &Ret);
847   bool OutOfRangeError(SMRange Range);
848   /// Calculate VGPR/SGPR blocks required for given target, reserved
849   /// registers, and user-specified NextFreeXGPR values.
850   ///
851   /// \param Features [in] Target features, used for bug corrections.
852   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
853   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
854   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
855   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
856   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
857   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
858   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
859   /// \param VGPRBlocks [out] Result VGPR block count.
860   /// \param SGPRBlocks [out] Result SGPR block count.
861   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
862                           bool FlatScrUsed, bool XNACKUsed,
863                           unsigned NextFreeVGPR, SMRange VGPRRange,
864                           unsigned NextFreeSGPR, SMRange SGPRRange,
865                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
866   bool ParseDirectiveAMDGCNTarget();
867   bool ParseDirectiveAMDHSAKernel();
868   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
869   bool ParseDirectiveHSACodeObjectVersion();
870   bool ParseDirectiveHSACodeObjectISA();
871   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
872   bool ParseDirectiveAMDKernelCodeT();
873   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
874   bool ParseDirectiveAMDGPUHsaKernel();
875 
876   bool ParseDirectiveISAVersion();
877   bool ParseDirectiveHSAMetadata();
878   bool ParseDirectivePALMetadata();
879 
880   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
881                              RegisterKind RegKind, unsigned Reg1,
882                              unsigned RegNum);
883   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
884                            unsigned& RegNum, unsigned& RegWidth,
885                            unsigned *DwordRegIndex);
886   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
887   void initializeGprCountSymbol(RegisterKind RegKind);
888   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
889                              unsigned RegWidth);
890   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
891                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
892   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
893                  bool IsGdsHardcoded);
894 
895 public:
896   enum AMDGPUMatchResultTy {
897     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
898   };
899 
900   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
901 
902   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
903                const MCInstrInfo &MII,
904                const MCTargetOptions &Options)
905       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
906     MCAsmParserExtension::Initialize(Parser);
907 
908     if (getFeatureBits().none()) {
909       // Set default features.
910       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
911     }
912 
913     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
914 
915     {
916       // TODO: make those pre-defined variables read-only.
917       // Currently there is none suitable machinery in the core llvm-mc for this.
918       // MCSymbol::isRedefinable is intended for another purpose, and
919       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
920       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
921       MCContext &Ctx = getContext();
922       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
923         MCSymbol *Sym =
924             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
925         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
926         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
927         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
928         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
929         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
930       } else {
931         MCSymbol *Sym =
932             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
933         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
934         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
935         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
936         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
937         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
938       }
939       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
940         initializeGprCountSymbol(IS_VGPR);
941         initializeGprCountSymbol(IS_SGPR);
942       } else
943         KernelScope.initialize(getContext());
944     }
945   }
946 
947   bool hasXNACK() const {
948     return AMDGPU::hasXNACK(getSTI());
949   }
950 
951   bool hasMIMG_R128() const {
952     return AMDGPU::hasMIMG_R128(getSTI());
953   }
954 
955   bool hasPackedD16() const {
956     return AMDGPU::hasPackedD16(getSTI());
957   }
958 
959   bool isSI() const {
960     return AMDGPU::isSI(getSTI());
961   }
962 
963   bool isCI() const {
964     return AMDGPU::isCI(getSTI());
965   }
966 
967   bool isVI() const {
968     return AMDGPU::isVI(getSTI());
969   }
970 
971   bool isGFX9() const {
972     return AMDGPU::isGFX9(getSTI());
973   }
974 
975   bool hasInv2PiInlineImm() const {
976     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
977   }
978 
979   bool hasFlatOffsets() const {
980     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
981   }
982 
983   bool hasSGPR102_SGPR103() const {
984     return !isVI();
985   }
986 
987   bool hasIntClamp() const {
988     return getFeatureBits()[AMDGPU::FeatureIntClamp];
989   }
990 
991   AMDGPUTargetStreamer &getTargetStreamer() {
992     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
993     return static_cast<AMDGPUTargetStreamer &>(TS);
994   }
995 
996   const MCRegisterInfo *getMRI() const {
997     // We need this const_cast because for some reason getContext() is not const
998     // in MCAsmParser.
999     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1000   }
1001 
1002   const MCInstrInfo *getMII() const {
1003     return &MII;
1004   }
1005 
1006   const FeatureBitset &getFeatureBits() const {
1007     return getSTI().getFeatureBits();
1008   }
1009 
1010   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1011   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1012   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1013 
1014   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1015   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1016   bool isForcedDPP() const { return ForcedDPP; }
1017   bool isForcedSDWA() const { return ForcedSDWA; }
1018   ArrayRef<unsigned> getMatchedVariants() const;
1019 
1020   std::unique_ptr<AMDGPUOperand> parseRegister();
1021   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1022   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1023   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1024                                       unsigned Kind) override;
1025   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1026                                OperandVector &Operands, MCStreamer &Out,
1027                                uint64_t &ErrorInfo,
1028                                bool MatchingInlineAsm) override;
1029   bool ParseDirective(AsmToken DirectiveID) override;
1030   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1031   StringRef parseMnemonicSuffix(StringRef Name);
1032   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1033                         SMLoc NameLoc, OperandVector &Operands) override;
1034   //bool ProcessInstruction(MCInst &Inst);
1035 
1036   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1037 
1038   OperandMatchResultTy
1039   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1040                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1041                      bool (*ConvertResult)(int64_t &) = nullptr);
1042 
1043   OperandMatchResultTy parseOperandArrayWithPrefix(
1044     const char *Prefix,
1045     OperandVector &Operands,
1046     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1047     bool (*ConvertResult)(int64_t&) = nullptr);
1048 
1049   OperandMatchResultTy
1050   parseNamedBit(const char *Name, OperandVector &Operands,
1051                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1052   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1053                                              StringRef &Value);
1054 
1055   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1056   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1057   OperandMatchResultTy parseReg(OperandVector &Operands);
1058   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1059   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1060   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1061   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1062   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1063   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1064   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1065 
1066   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1067   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1068   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1069   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1070 
1071   bool parseCnt(int64_t &IntVal);
1072   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1073   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1074 
1075 private:
1076   struct OperandInfoTy {
1077     int64_t Id;
1078     bool IsSymbolic = false;
1079 
1080     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1081   };
1082 
1083   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1084   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1085 
1086   void errorExpTgt();
1087   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1088 
1089   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1090   bool validateSOPLiteral(const MCInst &Inst) const;
1091   bool validateConstantBusLimitations(const MCInst &Inst);
1092   bool validateEarlyClobberLimitations(const MCInst &Inst);
1093   bool validateIntClampSupported(const MCInst &Inst);
1094   bool validateMIMGAtomicDMask(const MCInst &Inst);
1095   bool validateMIMGGatherDMask(const MCInst &Inst);
1096   bool validateMIMGDataSize(const MCInst &Inst);
1097   bool validateMIMGD16(const MCInst &Inst);
1098   bool validateLdsDirect(const MCInst &Inst);
1099   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1100   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1101   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1102 
1103   bool trySkipId(const StringRef Id);
1104   bool trySkipToken(const AsmToken::TokenKind Kind);
1105   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1106   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1107   bool parseExpr(int64_t &Imm);
1108 
1109 public:
1110   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1111   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1112 
1113   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1114   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1115   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1116   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1117   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1118 
1119   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1120                             const unsigned MinVal,
1121                             const unsigned MaxVal,
1122                             const StringRef ErrMsg);
1123   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1124   bool parseSwizzleOffset(int64_t &Imm);
1125   bool parseSwizzleMacro(int64_t &Imm);
1126   bool parseSwizzleQuadPerm(int64_t &Imm);
1127   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1128   bool parseSwizzleBroadcast(int64_t &Imm);
1129   bool parseSwizzleSwap(int64_t &Imm);
1130   bool parseSwizzleReverse(int64_t &Imm);
1131 
1132   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1133   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1134   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1135   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1136   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1137 
1138   AMDGPUOperand::Ptr defaultGLC() const;
1139   AMDGPUOperand::Ptr defaultSLC() const;
1140 
1141   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1142   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1143   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1144   AMDGPUOperand::Ptr defaultOffsetU12() const;
1145   AMDGPUOperand::Ptr defaultOffsetS13() const;
1146 
1147   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1148 
1149   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1150                OptionalImmIndexMap &OptionalIdx);
1151   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1152   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1153   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1154 
1155   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1156 
1157   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1158                bool IsAtomic = false);
1159   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1160 
1161   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1162   AMDGPUOperand::Ptr defaultRowMask() const;
1163   AMDGPUOperand::Ptr defaultBankMask() const;
1164   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1165   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1166 
1167   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1168                                     AMDGPUOperand::ImmTy Type);
1169   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1170   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1171   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1172   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1173   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1174   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1175                 uint64_t BasicInstType, bool skipVcc = false);
1176 };
1177 
1178 struct OptionalOperand {
1179   const char *Name;
1180   AMDGPUOperand::ImmTy Type;
1181   bool IsBit;
1182   bool (*ConvertResult)(int64_t&);
1183 };
1184 
1185 } // end anonymous namespace
1186 
1187 // May be called with integer type with equivalent bitwidth.
1188 static const fltSemantics *getFltSemantics(unsigned Size) {
1189   switch (Size) {
1190   case 4:
1191     return &APFloat::IEEEsingle();
1192   case 8:
1193     return &APFloat::IEEEdouble();
1194   case 2:
1195     return &APFloat::IEEEhalf();
1196   default:
1197     llvm_unreachable("unsupported fp type");
1198   }
1199 }
1200 
1201 static const fltSemantics *getFltSemantics(MVT VT) {
1202   return getFltSemantics(VT.getSizeInBits() / 8);
1203 }
1204 
1205 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1206   switch (OperandType) {
1207   case AMDGPU::OPERAND_REG_IMM_INT32:
1208   case AMDGPU::OPERAND_REG_IMM_FP32:
1209   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1210   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1211     return &APFloat::IEEEsingle();
1212   case AMDGPU::OPERAND_REG_IMM_INT64:
1213   case AMDGPU::OPERAND_REG_IMM_FP64:
1214   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1215   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1216     return &APFloat::IEEEdouble();
1217   case AMDGPU::OPERAND_REG_IMM_INT16:
1218   case AMDGPU::OPERAND_REG_IMM_FP16:
1219   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1220   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1221   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1222   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1223     return &APFloat::IEEEhalf();
1224   default:
1225     llvm_unreachable("unsupported fp type");
1226   }
1227 }
1228 
1229 //===----------------------------------------------------------------------===//
1230 // Operand
1231 //===----------------------------------------------------------------------===//
1232 
1233 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1234   bool Lost;
1235 
1236   // Convert literal to single precision
1237   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1238                                                APFloat::rmNearestTiesToEven,
1239                                                &Lost);
1240   // We allow precision lost but not overflow or underflow
1241   if (Status != APFloat::opOK &&
1242       Lost &&
1243       ((Status & APFloat::opOverflow)  != 0 ||
1244        (Status & APFloat::opUnderflow) != 0)) {
1245     return false;
1246   }
1247 
1248   return true;
1249 }
1250 
1251 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1252   if (!isImmTy(ImmTyNone)) {
1253     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1254     return false;
1255   }
1256   // TODO: We should avoid using host float here. It would be better to
1257   // check the float bit values which is what a few other places do.
1258   // We've had bot failures before due to weird NaN support on mips hosts.
1259 
1260   APInt Literal(64, Imm.Val);
1261 
1262   if (Imm.IsFPImm) { // We got fp literal token
1263     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1264       return AMDGPU::isInlinableLiteral64(Imm.Val,
1265                                           AsmParser->hasInv2PiInlineImm());
1266     }
1267 
1268     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1269     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1270       return false;
1271 
1272     if (type.getScalarSizeInBits() == 16) {
1273       return AMDGPU::isInlinableLiteral16(
1274         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1275         AsmParser->hasInv2PiInlineImm());
1276     }
1277 
1278     // Check if single precision literal is inlinable
1279     return AMDGPU::isInlinableLiteral32(
1280       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1281       AsmParser->hasInv2PiInlineImm());
1282   }
1283 
1284   // We got int literal token.
1285   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1286     return AMDGPU::isInlinableLiteral64(Imm.Val,
1287                                         AsmParser->hasInv2PiInlineImm());
1288   }
1289 
1290   if (type.getScalarSizeInBits() == 16) {
1291     return AMDGPU::isInlinableLiteral16(
1292       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1293       AsmParser->hasInv2PiInlineImm());
1294   }
1295 
1296   return AMDGPU::isInlinableLiteral32(
1297     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1298     AsmParser->hasInv2PiInlineImm());
1299 }
1300 
1301 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1302   // Check that this immediate can be added as literal
1303   if (!isImmTy(ImmTyNone)) {
1304     return false;
1305   }
1306 
1307   if (!Imm.IsFPImm) {
1308     // We got int literal token.
1309 
1310     if (type == MVT::f64 && hasFPModifiers()) {
1311       // Cannot apply fp modifiers to int literals preserving the same semantics
1312       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1313       // disable these cases.
1314       return false;
1315     }
1316 
1317     unsigned Size = type.getSizeInBits();
1318     if (Size == 64)
1319       Size = 32;
1320 
1321     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1322     // types.
1323     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1324   }
1325 
1326   // We got fp literal token
1327   if (type == MVT::f64) { // Expected 64-bit fp operand
1328     // We would set low 64-bits of literal to zeroes but we accept this literals
1329     return true;
1330   }
1331 
1332   if (type == MVT::i64) { // Expected 64-bit int operand
1333     // We don't allow fp literals in 64-bit integer instructions. It is
1334     // unclear how we should encode them.
1335     return false;
1336   }
1337 
1338   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1339   return canLosslesslyConvertToFPType(FPLiteral, type);
1340 }
1341 
1342 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1343   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1344 }
1345 
1346 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1347   if (AsmParser->isVI())
1348     return isVReg();
1349   else if (AsmParser->isGFX9())
1350     return isRegKind() || isInlinableImm(type);
1351   else
1352     return false;
1353 }
1354 
1355 bool AMDGPUOperand::isSDWAFP16Operand() const {
1356   return isSDWAOperand(MVT::f16);
1357 }
1358 
1359 bool AMDGPUOperand::isSDWAFP32Operand() const {
1360   return isSDWAOperand(MVT::f32);
1361 }
1362 
1363 bool AMDGPUOperand::isSDWAInt16Operand() const {
1364   return isSDWAOperand(MVT::i16);
1365 }
1366 
1367 bool AMDGPUOperand::isSDWAInt32Operand() const {
1368   return isSDWAOperand(MVT::i32);
1369 }
1370 
1371 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1372 {
1373   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1374   assert(Size == 2 || Size == 4 || Size == 8);
1375 
1376   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1377 
1378   if (Imm.Mods.Abs) {
1379     Val &= ~FpSignMask;
1380   }
1381   if (Imm.Mods.Neg) {
1382     Val ^= FpSignMask;
1383   }
1384 
1385   return Val;
1386 }
1387 
1388 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1389   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1390                              Inst.getNumOperands())) {
1391     addLiteralImmOperand(Inst, Imm.Val,
1392                          ApplyModifiers &
1393                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1394   } else {
1395     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1396     Inst.addOperand(MCOperand::createImm(Imm.Val));
1397   }
1398 }
1399 
1400 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1401   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1402   auto OpNum = Inst.getNumOperands();
1403   // Check that this operand accepts literals
1404   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1405 
1406   if (ApplyModifiers) {
1407     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1408     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1409     Val = applyInputFPModifiers(Val, Size);
1410   }
1411 
1412   APInt Literal(64, Val);
1413   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1414 
1415   if (Imm.IsFPImm) { // We got fp literal token
1416     switch (OpTy) {
1417     case AMDGPU::OPERAND_REG_IMM_INT64:
1418     case AMDGPU::OPERAND_REG_IMM_FP64:
1419     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1420     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1421       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1422                                        AsmParser->hasInv2PiInlineImm())) {
1423         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1424         return;
1425       }
1426 
1427       // Non-inlineable
1428       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1429         // For fp operands we check if low 32 bits are zeros
1430         if (Literal.getLoBits(32) != 0) {
1431           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1432           "Can't encode literal as exact 64-bit floating-point operand. "
1433           "Low 32-bits will be set to zero");
1434         }
1435 
1436         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1437         return;
1438       }
1439 
1440       // We don't allow fp literals in 64-bit integer instructions. It is
1441       // unclear how we should encode them. This case should be checked earlier
1442       // in predicate methods (isLiteralImm())
1443       llvm_unreachable("fp literal in 64-bit integer instruction.");
1444 
1445     case AMDGPU::OPERAND_REG_IMM_INT32:
1446     case AMDGPU::OPERAND_REG_IMM_FP32:
1447     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1448     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1449     case AMDGPU::OPERAND_REG_IMM_INT16:
1450     case AMDGPU::OPERAND_REG_IMM_FP16:
1451     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1452     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1453     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1454     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1455       bool lost;
1456       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1457       // Convert literal to single precision
1458       FPLiteral.convert(*getOpFltSemantics(OpTy),
1459                         APFloat::rmNearestTiesToEven, &lost);
1460       // We allow precision lost but not overflow or underflow. This should be
1461       // checked earlier in isLiteralImm()
1462 
1463       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1464       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1465           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1466         ImmVal |= (ImmVal << 16);
1467       }
1468 
1469       Inst.addOperand(MCOperand::createImm(ImmVal));
1470       return;
1471     }
1472     default:
1473       llvm_unreachable("invalid operand size");
1474     }
1475 
1476     return;
1477   }
1478 
1479    // We got int literal token.
1480   // Only sign extend inline immediates.
1481   // FIXME: No errors on truncation
1482   switch (OpTy) {
1483   case AMDGPU::OPERAND_REG_IMM_INT32:
1484   case AMDGPU::OPERAND_REG_IMM_FP32:
1485   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1486   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1487     if (isInt<32>(Val) &&
1488         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1489                                      AsmParser->hasInv2PiInlineImm())) {
1490       Inst.addOperand(MCOperand::createImm(Val));
1491       return;
1492     }
1493 
1494     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1495     return;
1496 
1497   case AMDGPU::OPERAND_REG_IMM_INT64:
1498   case AMDGPU::OPERAND_REG_IMM_FP64:
1499   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1500   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1501     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1502       Inst.addOperand(MCOperand::createImm(Val));
1503       return;
1504     }
1505 
1506     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1507     return;
1508 
1509   case AMDGPU::OPERAND_REG_IMM_INT16:
1510   case AMDGPU::OPERAND_REG_IMM_FP16:
1511   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1512   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1513     if (isInt<16>(Val) &&
1514         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1515                                      AsmParser->hasInv2PiInlineImm())) {
1516       Inst.addOperand(MCOperand::createImm(Val));
1517       return;
1518     }
1519 
1520     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1521     return;
1522 
1523   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1524   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1525     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1526     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1527                                         AsmParser->hasInv2PiInlineImm()));
1528 
1529     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1530                       static_cast<uint32_t>(LiteralVal);
1531     Inst.addOperand(MCOperand::createImm(ImmVal));
1532     return;
1533   }
1534   default:
1535     llvm_unreachable("invalid operand size");
1536   }
1537 }
1538 
1539 template <unsigned Bitwidth>
1540 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1541   APInt Literal(64, Imm.Val);
1542 
1543   if (!Imm.IsFPImm) {
1544     // We got int literal token.
1545     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1546     return;
1547   }
1548 
1549   bool Lost;
1550   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1551   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1552                     APFloat::rmNearestTiesToEven, &Lost);
1553   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1554 }
1555 
1556 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1557   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1558 }
1559 
1560 //===----------------------------------------------------------------------===//
1561 // AsmParser
1562 //===----------------------------------------------------------------------===//
1563 
1564 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1565   if (Is == IS_VGPR) {
1566     switch (RegWidth) {
1567       default: return -1;
1568       case 1: return AMDGPU::VGPR_32RegClassID;
1569       case 2: return AMDGPU::VReg_64RegClassID;
1570       case 3: return AMDGPU::VReg_96RegClassID;
1571       case 4: return AMDGPU::VReg_128RegClassID;
1572       case 8: return AMDGPU::VReg_256RegClassID;
1573       case 16: return AMDGPU::VReg_512RegClassID;
1574     }
1575   } else if (Is == IS_TTMP) {
1576     switch (RegWidth) {
1577       default: return -1;
1578       case 1: return AMDGPU::TTMP_32RegClassID;
1579       case 2: return AMDGPU::TTMP_64RegClassID;
1580       case 4: return AMDGPU::TTMP_128RegClassID;
1581       case 8: return AMDGPU::TTMP_256RegClassID;
1582       case 16: return AMDGPU::TTMP_512RegClassID;
1583     }
1584   } else if (Is == IS_SGPR) {
1585     switch (RegWidth) {
1586       default: return -1;
1587       case 1: return AMDGPU::SGPR_32RegClassID;
1588       case 2: return AMDGPU::SGPR_64RegClassID;
1589       case 4: return AMDGPU::SGPR_128RegClassID;
1590       case 8: return AMDGPU::SGPR_256RegClassID;
1591       case 16: return AMDGPU::SGPR_512RegClassID;
1592     }
1593   }
1594   return -1;
1595 }
1596 
1597 static unsigned getSpecialRegForName(StringRef RegName) {
1598   return StringSwitch<unsigned>(RegName)
1599     .Case("exec", AMDGPU::EXEC)
1600     .Case("vcc", AMDGPU::VCC)
1601     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1602     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1603     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1604     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1605     .Case("m0", AMDGPU::M0)
1606     .Case("scc", AMDGPU::SCC)
1607     .Case("tba", AMDGPU::TBA)
1608     .Case("tma", AMDGPU::TMA)
1609     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1610     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1611     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1612     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1613     .Case("vcc_lo", AMDGPU::VCC_LO)
1614     .Case("vcc_hi", AMDGPU::VCC_HI)
1615     .Case("exec_lo", AMDGPU::EXEC_LO)
1616     .Case("exec_hi", AMDGPU::EXEC_HI)
1617     .Case("tma_lo", AMDGPU::TMA_LO)
1618     .Case("tma_hi", AMDGPU::TMA_HI)
1619     .Case("tba_lo", AMDGPU::TBA_LO)
1620     .Case("tba_hi", AMDGPU::TBA_HI)
1621     .Default(0);
1622 }
1623 
1624 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1625                                     SMLoc &EndLoc) {
1626   auto R = parseRegister();
1627   if (!R) return true;
1628   assert(R->isReg());
1629   RegNo = R->getReg();
1630   StartLoc = R->getStartLoc();
1631   EndLoc = R->getEndLoc();
1632   return false;
1633 }
1634 
1635 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1636                                             RegisterKind RegKind, unsigned Reg1,
1637                                             unsigned RegNum) {
1638   switch (RegKind) {
1639   case IS_SPECIAL:
1640     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1641       Reg = AMDGPU::EXEC;
1642       RegWidth = 2;
1643       return true;
1644     }
1645     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1646       Reg = AMDGPU::FLAT_SCR;
1647       RegWidth = 2;
1648       return true;
1649     }
1650     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1651       Reg = AMDGPU::XNACK_MASK;
1652       RegWidth = 2;
1653       return true;
1654     }
1655     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1656       Reg = AMDGPU::VCC;
1657       RegWidth = 2;
1658       return true;
1659     }
1660     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1661       Reg = AMDGPU::TBA;
1662       RegWidth = 2;
1663       return true;
1664     }
1665     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1666       Reg = AMDGPU::TMA;
1667       RegWidth = 2;
1668       return true;
1669     }
1670     return false;
1671   case IS_VGPR:
1672   case IS_SGPR:
1673   case IS_TTMP:
1674     if (Reg1 != Reg + RegWidth) {
1675       return false;
1676     }
1677     RegWidth++;
1678     return true;
1679   default:
1680     llvm_unreachable("unexpected register kind");
1681   }
1682 }
1683 
1684 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1685                                           unsigned &RegNum, unsigned &RegWidth,
1686                                           unsigned *DwordRegIndex) {
1687   if (DwordRegIndex) { *DwordRegIndex = 0; }
1688   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1689   if (getLexer().is(AsmToken::Identifier)) {
1690     StringRef RegName = Parser.getTok().getString();
1691     if ((Reg = getSpecialRegForName(RegName))) {
1692       Parser.Lex();
1693       RegKind = IS_SPECIAL;
1694     } else {
1695       unsigned RegNumIndex = 0;
1696       if (RegName[0] == 'v') {
1697         RegNumIndex = 1;
1698         RegKind = IS_VGPR;
1699       } else if (RegName[0] == 's') {
1700         RegNumIndex = 1;
1701         RegKind = IS_SGPR;
1702       } else if (RegName.startswith("ttmp")) {
1703         RegNumIndex = strlen("ttmp");
1704         RegKind = IS_TTMP;
1705       } else {
1706         return false;
1707       }
1708       if (RegName.size() > RegNumIndex) {
1709         // Single 32-bit register: vXX.
1710         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1711           return false;
1712         Parser.Lex();
1713         RegWidth = 1;
1714       } else {
1715         // Range of registers: v[XX:YY]. ":YY" is optional.
1716         Parser.Lex();
1717         int64_t RegLo, RegHi;
1718         if (getLexer().isNot(AsmToken::LBrac))
1719           return false;
1720         Parser.Lex();
1721 
1722         if (getParser().parseAbsoluteExpression(RegLo))
1723           return false;
1724 
1725         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1726         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1727           return false;
1728         Parser.Lex();
1729 
1730         if (isRBrace) {
1731           RegHi = RegLo;
1732         } else {
1733           if (getParser().parseAbsoluteExpression(RegHi))
1734             return false;
1735 
1736           if (getLexer().isNot(AsmToken::RBrac))
1737             return false;
1738           Parser.Lex();
1739         }
1740         RegNum = (unsigned) RegLo;
1741         RegWidth = (RegHi - RegLo) + 1;
1742       }
1743     }
1744   } else if (getLexer().is(AsmToken::LBrac)) {
1745     // List of consecutive registers: [s0,s1,s2,s3]
1746     Parser.Lex();
1747     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1748       return false;
1749     if (RegWidth != 1)
1750       return false;
1751     RegisterKind RegKind1;
1752     unsigned Reg1, RegNum1, RegWidth1;
1753     do {
1754       if (getLexer().is(AsmToken::Comma)) {
1755         Parser.Lex();
1756       } else if (getLexer().is(AsmToken::RBrac)) {
1757         Parser.Lex();
1758         break;
1759       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1760         if (RegWidth1 != 1) {
1761           return false;
1762         }
1763         if (RegKind1 != RegKind) {
1764           return false;
1765         }
1766         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1767           return false;
1768         }
1769       } else {
1770         return false;
1771       }
1772     } while (true);
1773   } else {
1774     return false;
1775   }
1776   switch (RegKind) {
1777   case IS_SPECIAL:
1778     RegNum = 0;
1779     RegWidth = 1;
1780     break;
1781   case IS_VGPR:
1782   case IS_SGPR:
1783   case IS_TTMP:
1784   {
1785     unsigned Size = 1;
1786     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1787       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1788       Size = std::min(RegWidth, 4u);
1789     }
1790     if (RegNum % Size != 0)
1791       return false;
1792     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1793     RegNum = RegNum / Size;
1794     int RCID = getRegClass(RegKind, RegWidth);
1795     if (RCID == -1)
1796       return false;
1797     const MCRegisterClass RC = TRI->getRegClass(RCID);
1798     if (RegNum >= RC.getNumRegs())
1799       return false;
1800     Reg = RC.getRegister(RegNum);
1801     break;
1802   }
1803 
1804   default:
1805     llvm_unreachable("unexpected register kind");
1806   }
1807 
1808   if (!subtargetHasRegister(*TRI, Reg))
1809     return false;
1810   return true;
1811 }
1812 
1813 Optional<StringRef>
1814 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1815   switch (RegKind) {
1816   case IS_VGPR:
1817     return StringRef(".amdgcn.next_free_vgpr");
1818   case IS_SGPR:
1819     return StringRef(".amdgcn.next_free_sgpr");
1820   default:
1821     return None;
1822   }
1823 }
1824 
1825 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1826   auto SymbolName = getGprCountSymbolName(RegKind);
1827   assert(SymbolName && "initializing invalid register kind");
1828   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1829   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1830 }
1831 
1832 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1833                                             unsigned DwordRegIndex,
1834                                             unsigned RegWidth) {
1835   // Symbols are only defined for GCN targets
1836   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1837     return true;
1838 
1839   auto SymbolName = getGprCountSymbolName(RegKind);
1840   if (!SymbolName)
1841     return true;
1842   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1843 
1844   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1845   int64_t OldCount;
1846 
1847   if (!Sym->isVariable())
1848     return !Error(getParser().getTok().getLoc(),
1849                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1850   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1851     return !Error(
1852         getParser().getTok().getLoc(),
1853         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1854 
1855   if (OldCount <= NewMax)
1856     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1857 
1858   return true;
1859 }
1860 
1861 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1862   const auto &Tok = Parser.getTok();
1863   SMLoc StartLoc = Tok.getLoc();
1864   SMLoc EndLoc = Tok.getEndLoc();
1865   RegisterKind RegKind;
1866   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1867 
1868   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1869     return nullptr;
1870   }
1871   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1872     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1873       return nullptr;
1874   } else
1875     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1876   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1877 }
1878 
1879 bool
1880 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1881   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1882       (getLexer().getKind() == AsmToken::Integer ||
1883        getLexer().getKind() == AsmToken::Real)) {
1884     // This is a workaround for handling operands like these:
1885     //     |1.0|
1886     //     |-1|
1887     // This syntax is not compatible with syntax of standard
1888     // MC expressions (due to the trailing '|').
1889 
1890     SMLoc EndLoc;
1891     const MCExpr *Expr;
1892 
1893     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1894       return true;
1895     }
1896 
1897     return !Expr->evaluateAsAbsolute(Val);
1898   }
1899 
1900   return getParser().parseAbsoluteExpression(Val);
1901 }
1902 
1903 OperandMatchResultTy
1904 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1905   // TODO: add syntactic sugar for 1/(2*PI)
1906   bool Minus = false;
1907   if (getLexer().getKind() == AsmToken::Minus) {
1908     const AsmToken NextToken = getLexer().peekTok();
1909     if (!NextToken.is(AsmToken::Integer) &&
1910         !NextToken.is(AsmToken::Real)) {
1911         return MatchOperand_NoMatch;
1912     }
1913     Minus = true;
1914     Parser.Lex();
1915   }
1916 
1917   SMLoc S = Parser.getTok().getLoc();
1918   switch(getLexer().getKind()) {
1919   case AsmToken::Integer: {
1920     int64_t IntVal;
1921     if (parseAbsoluteExpr(IntVal, AbsMod))
1922       return MatchOperand_ParseFail;
1923     if (Minus)
1924       IntVal *= -1;
1925     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1926     return MatchOperand_Success;
1927   }
1928   case AsmToken::Real: {
1929     int64_t IntVal;
1930     if (parseAbsoluteExpr(IntVal, AbsMod))
1931       return MatchOperand_ParseFail;
1932 
1933     APFloat F(BitsToDouble(IntVal));
1934     if (Minus)
1935       F.changeSign();
1936     Operands.push_back(
1937         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1938                                  AMDGPUOperand::ImmTyNone, true));
1939     return MatchOperand_Success;
1940   }
1941   default:
1942     return MatchOperand_NoMatch;
1943   }
1944 }
1945 
1946 OperandMatchResultTy
1947 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1948   if (auto R = parseRegister()) {
1949     assert(R->isReg());
1950     R->Reg.IsForcedVOP3 = isForcedVOP3();
1951     Operands.push_back(std::move(R));
1952     return MatchOperand_Success;
1953   }
1954   return MatchOperand_NoMatch;
1955 }
1956 
1957 OperandMatchResultTy
1958 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1959   auto res = parseImm(Operands, AbsMod);
1960   if (res != MatchOperand_NoMatch) {
1961     return res;
1962   }
1963 
1964   return parseReg(Operands);
1965 }
1966 
1967 OperandMatchResultTy
1968 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1969                                               bool AllowImm) {
1970   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1971 
1972   if (getLexer().getKind()== AsmToken::Minus) {
1973     const AsmToken NextToken = getLexer().peekTok();
1974 
1975     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1976     if (NextToken.is(AsmToken::Minus)) {
1977       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1978       return MatchOperand_ParseFail;
1979     }
1980 
1981     // '-' followed by an integer literal N should be interpreted as integer
1982     // negation rather than a floating-point NEG modifier applied to N.
1983     // Beside being contr-intuitive, such use of floating-point NEG modifier
1984     // results in different meaning of integer literals used with VOP1/2/C
1985     // and VOP3, for example:
1986     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1987     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1988     // Negative fp literals should be handled likewise for unifomtity
1989     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1990       Parser.Lex();
1991       Negate = true;
1992     }
1993   }
1994 
1995   if (getLexer().getKind() == AsmToken::Identifier &&
1996       Parser.getTok().getString() == "neg") {
1997     if (Negate) {
1998       Error(Parser.getTok().getLoc(), "expected register or immediate");
1999       return MatchOperand_ParseFail;
2000     }
2001     Parser.Lex();
2002     Negate2 = true;
2003     if (getLexer().isNot(AsmToken::LParen)) {
2004       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2005       return MatchOperand_ParseFail;
2006     }
2007     Parser.Lex();
2008   }
2009 
2010   if (getLexer().getKind() == AsmToken::Identifier &&
2011       Parser.getTok().getString() == "abs") {
2012     Parser.Lex();
2013     Abs2 = true;
2014     if (getLexer().isNot(AsmToken::LParen)) {
2015       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2016       return MatchOperand_ParseFail;
2017     }
2018     Parser.Lex();
2019   }
2020 
2021   if (getLexer().getKind() == AsmToken::Pipe) {
2022     if (Abs2) {
2023       Error(Parser.getTok().getLoc(), "expected register or immediate");
2024       return MatchOperand_ParseFail;
2025     }
2026     Parser.Lex();
2027     Abs = true;
2028   }
2029 
2030   OperandMatchResultTy Res;
2031   if (AllowImm) {
2032     Res = parseRegOrImm(Operands, Abs);
2033   } else {
2034     Res = parseReg(Operands);
2035   }
2036   if (Res != MatchOperand_Success) {
2037     return Res;
2038   }
2039 
2040   AMDGPUOperand::Modifiers Mods;
2041   if (Abs) {
2042     if (getLexer().getKind() != AsmToken::Pipe) {
2043       Error(Parser.getTok().getLoc(), "expected vertical bar");
2044       return MatchOperand_ParseFail;
2045     }
2046     Parser.Lex();
2047     Mods.Abs = true;
2048   }
2049   if (Abs2) {
2050     if (getLexer().isNot(AsmToken::RParen)) {
2051       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2052       return MatchOperand_ParseFail;
2053     }
2054     Parser.Lex();
2055     Mods.Abs = true;
2056   }
2057 
2058   if (Negate) {
2059     Mods.Neg = true;
2060   } else if (Negate2) {
2061     if (getLexer().isNot(AsmToken::RParen)) {
2062       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2063       return MatchOperand_ParseFail;
2064     }
2065     Parser.Lex();
2066     Mods.Neg = true;
2067   }
2068 
2069   if (Mods.hasFPModifiers()) {
2070     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2071     Op.setModifiers(Mods);
2072   }
2073   return MatchOperand_Success;
2074 }
2075 
2076 OperandMatchResultTy
2077 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2078                                                bool AllowImm) {
2079   bool Sext = false;
2080 
2081   if (getLexer().getKind() == AsmToken::Identifier &&
2082       Parser.getTok().getString() == "sext") {
2083     Parser.Lex();
2084     Sext = true;
2085     if (getLexer().isNot(AsmToken::LParen)) {
2086       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2087       return MatchOperand_ParseFail;
2088     }
2089     Parser.Lex();
2090   }
2091 
2092   OperandMatchResultTy Res;
2093   if (AllowImm) {
2094     Res = parseRegOrImm(Operands);
2095   } else {
2096     Res = parseReg(Operands);
2097   }
2098   if (Res != MatchOperand_Success) {
2099     return Res;
2100   }
2101 
2102   AMDGPUOperand::Modifiers Mods;
2103   if (Sext) {
2104     if (getLexer().isNot(AsmToken::RParen)) {
2105       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2106       return MatchOperand_ParseFail;
2107     }
2108     Parser.Lex();
2109     Mods.Sext = true;
2110   }
2111 
2112   if (Mods.hasIntModifiers()) {
2113     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2114     Op.setModifiers(Mods);
2115   }
2116 
2117   return MatchOperand_Success;
2118 }
2119 
2120 OperandMatchResultTy
2121 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2122   return parseRegOrImmWithFPInputMods(Operands, false);
2123 }
2124 
2125 OperandMatchResultTy
2126 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2127   return parseRegOrImmWithIntInputMods(Operands, false);
2128 }
2129 
2130 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2131   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2132   if (Reg) {
2133     Operands.push_back(std::move(Reg));
2134     return MatchOperand_Success;
2135   }
2136 
2137   const AsmToken &Tok = Parser.getTok();
2138   if (Tok.getString() == "off") {
2139     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2140                                                 AMDGPUOperand::ImmTyOff, false));
2141     Parser.Lex();
2142     return MatchOperand_Success;
2143   }
2144 
2145   return MatchOperand_NoMatch;
2146 }
2147 
2148 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2149   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2150 
2151   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2152       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2153       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2154       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2155     return Match_InvalidOperand;
2156 
2157   if ((TSFlags & SIInstrFlags::VOP3) &&
2158       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2159       getForcedEncodingSize() != 64)
2160     return Match_PreferE32;
2161 
2162   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2163       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2164     // v_mac_f32/16 allow only dst_sel == DWORD;
2165     auto OpNum =
2166         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2167     const auto &Op = Inst.getOperand(OpNum);
2168     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2169       return Match_InvalidOperand;
2170     }
2171   }
2172 
2173   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2174     // FIXME: Produces error without correct column reported.
2175     auto OpNum =
2176         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2177     const auto &Op = Inst.getOperand(OpNum);
2178     if (Op.getImm() != 0)
2179       return Match_InvalidOperand;
2180   }
2181 
2182   return Match_Success;
2183 }
2184 
2185 // What asm variants we should check
2186 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2187   if (getForcedEncodingSize() == 32) {
2188     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2189     return makeArrayRef(Variants);
2190   }
2191 
2192   if (isForcedVOP3()) {
2193     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2194     return makeArrayRef(Variants);
2195   }
2196 
2197   if (isForcedSDWA()) {
2198     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2199                                         AMDGPUAsmVariants::SDWA9};
2200     return makeArrayRef(Variants);
2201   }
2202 
2203   if (isForcedDPP()) {
2204     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2205     return makeArrayRef(Variants);
2206   }
2207 
2208   static const unsigned Variants[] = {
2209     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2210     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2211   };
2212 
2213   return makeArrayRef(Variants);
2214 }
2215 
2216 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2217   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2218   const unsigned Num = Desc.getNumImplicitUses();
2219   for (unsigned i = 0; i < Num; ++i) {
2220     unsigned Reg = Desc.ImplicitUses[i];
2221     switch (Reg) {
2222     case AMDGPU::FLAT_SCR:
2223     case AMDGPU::VCC:
2224     case AMDGPU::M0:
2225       return Reg;
2226     default:
2227       break;
2228     }
2229   }
2230   return AMDGPU::NoRegister;
2231 }
2232 
2233 // NB: This code is correct only when used to check constant
2234 // bus limitations because GFX7 support no f16 inline constants.
2235 // Note that there are no cases when a GFX7 opcode violates
2236 // constant bus limitations due to the use of an f16 constant.
2237 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2238                                        unsigned OpIdx) const {
2239   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2240 
2241   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2242     return false;
2243   }
2244 
2245   const MCOperand &MO = Inst.getOperand(OpIdx);
2246 
2247   int64_t Val = MO.getImm();
2248   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2249 
2250   switch (OpSize) { // expected operand size
2251   case 8:
2252     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2253   case 4:
2254     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2255   case 2: {
2256     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2257     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2258         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2259       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2260     } else {
2261       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2262     }
2263   }
2264   default:
2265     llvm_unreachable("invalid operand size");
2266   }
2267 }
2268 
2269 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2270   const MCOperand &MO = Inst.getOperand(OpIdx);
2271   if (MO.isImm()) {
2272     return !isInlineConstant(Inst, OpIdx);
2273   }
2274   return !MO.isReg() ||
2275          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2276 }
2277 
2278 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2279   const unsigned Opcode = Inst.getOpcode();
2280   const MCInstrDesc &Desc = MII.get(Opcode);
2281   unsigned ConstantBusUseCount = 0;
2282 
2283   if (Desc.TSFlags &
2284       (SIInstrFlags::VOPC |
2285        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2286        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2287        SIInstrFlags::SDWA)) {
2288     // Check special imm operands (used by madmk, etc)
2289     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2290       ++ConstantBusUseCount;
2291     }
2292 
2293     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2294     if (SGPRUsed != AMDGPU::NoRegister) {
2295       ++ConstantBusUseCount;
2296     }
2297 
2298     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2299     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2300     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2301 
2302     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2303 
2304     for (int OpIdx : OpIndices) {
2305       if (OpIdx == -1) break;
2306 
2307       const MCOperand &MO = Inst.getOperand(OpIdx);
2308       if (usesConstantBus(Inst, OpIdx)) {
2309         if (MO.isReg()) {
2310           const unsigned Reg = mc2PseudoReg(MO.getReg());
2311           // Pairs of registers with a partial intersections like these
2312           //   s0, s[0:1]
2313           //   flat_scratch_lo, flat_scratch
2314           //   flat_scratch_lo, flat_scratch_hi
2315           // are theoretically valid but they are disabled anyway.
2316           // Note that this code mimics SIInstrInfo::verifyInstruction
2317           if (Reg != SGPRUsed) {
2318             ++ConstantBusUseCount;
2319           }
2320           SGPRUsed = Reg;
2321         } else { // Expression or a literal
2322           ++ConstantBusUseCount;
2323         }
2324       }
2325     }
2326   }
2327 
2328   return ConstantBusUseCount <= 1;
2329 }
2330 
2331 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2332   const unsigned Opcode = Inst.getOpcode();
2333   const MCInstrDesc &Desc = MII.get(Opcode);
2334 
2335   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2336   if (DstIdx == -1 ||
2337       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2338     return true;
2339   }
2340 
2341   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2342 
2343   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2344   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2345   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2346 
2347   assert(DstIdx != -1);
2348   const MCOperand &Dst = Inst.getOperand(DstIdx);
2349   assert(Dst.isReg());
2350   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2351 
2352   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2353 
2354   for (int SrcIdx : SrcIndices) {
2355     if (SrcIdx == -1) break;
2356     const MCOperand &Src = Inst.getOperand(SrcIdx);
2357     if (Src.isReg()) {
2358       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2359       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2360         return false;
2361       }
2362     }
2363   }
2364 
2365   return true;
2366 }
2367 
2368 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2369 
2370   const unsigned Opc = Inst.getOpcode();
2371   const MCInstrDesc &Desc = MII.get(Opc);
2372 
2373   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2374     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2375     assert(ClampIdx != -1);
2376     return Inst.getOperand(ClampIdx).getImm() == 0;
2377   }
2378 
2379   return true;
2380 }
2381 
2382 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2383 
2384   const unsigned Opc = Inst.getOpcode();
2385   const MCInstrDesc &Desc = MII.get(Opc);
2386 
2387   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2388     return true;
2389 
2390   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2391   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2392   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2393 
2394   assert(VDataIdx != -1);
2395   assert(DMaskIdx != -1);
2396   assert(TFEIdx != -1);
2397 
2398   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2399   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2400   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2401   if (DMask == 0)
2402     DMask = 1;
2403 
2404   unsigned DataSize =
2405     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2406   if (hasPackedD16()) {
2407     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2408     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2409       DataSize = (DataSize + 1) / 2;
2410   }
2411 
2412   return (VDataSize / 4) == DataSize + TFESize;
2413 }
2414 
2415 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2416 
2417   const unsigned Opc = Inst.getOpcode();
2418   const MCInstrDesc &Desc = MII.get(Opc);
2419 
2420   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2421     return true;
2422   if (!Desc.mayLoad() || !Desc.mayStore())
2423     return true; // Not atomic
2424 
2425   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2426   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2427 
2428   // This is an incomplete check because image_atomic_cmpswap
2429   // may only use 0x3 and 0xf while other atomic operations
2430   // may use 0x1 and 0x3. However these limitations are
2431   // verified when we check that dmask matches dst size.
2432   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2433 }
2434 
2435 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2436 
2437   const unsigned Opc = Inst.getOpcode();
2438   const MCInstrDesc &Desc = MII.get(Opc);
2439 
2440   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2441     return true;
2442 
2443   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2444   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2445 
2446   // GATHER4 instructions use dmask in a different fashion compared to
2447   // other MIMG instructions. The only useful DMASK values are
2448   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2449   // (red,red,red,red) etc.) The ISA document doesn't mention
2450   // this.
2451   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2452 }
2453 
2454 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2455 
2456   const unsigned Opc = Inst.getOpcode();
2457   const MCInstrDesc &Desc = MII.get(Opc);
2458 
2459   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2460     return true;
2461 
2462   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2463   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2464     if (isCI() || isSI())
2465       return false;
2466   }
2467 
2468   return true;
2469 }
2470 
2471 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2472 
2473   using namespace SIInstrFlags;
2474   const unsigned Opcode = Inst.getOpcode();
2475   const MCInstrDesc &Desc = MII.get(Opcode);
2476 
2477   // lds_direct register is defined so that it can be used
2478   // with 9-bit operands only. Ignore encodings which do not accept these.
2479   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2480     return true;
2481 
2482   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2483   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2484   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2485 
2486   const int SrcIndices[] = { Src1Idx, Src2Idx };
2487 
2488   // lds_direct cannot be specified as either src1 or src2.
2489   for (int SrcIdx : SrcIndices) {
2490     if (SrcIdx == -1) break;
2491     const MCOperand &Src = Inst.getOperand(SrcIdx);
2492     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2493       return false;
2494     }
2495   }
2496 
2497   if (Src0Idx == -1)
2498     return true;
2499 
2500   const MCOperand &Src = Inst.getOperand(Src0Idx);
2501   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2502     return true;
2503 
2504   // lds_direct is specified as src0. Check additional limitations.
2505 
2506   // FIXME: This is a workaround for bug 37943
2507   // which allows 64-bit VOP3 opcodes use 32-bit operands.
2508   if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4)
2509     return false;
2510 
2511   // Documentation does not disable lds_direct for SDWA, but SP3 assembler does.
2512   // FIXME: This inconsistence needs to be investigated further.
2513   if (Desc.TSFlags & SIInstrFlags::SDWA)
2514     return false;
2515 
2516   // The following opcodes do not accept lds_direct which is explicitly stated
2517   // in AMD documentation. However SP3 disables lds_direct for most other 'rev'
2518   // opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32).
2519   // FIXME: This inconsistence needs to be investigated further.
2520   switch (Opcode) {
2521   case AMDGPU::V_LSHLREV_B32_e32_si:
2522   case AMDGPU::V_LSHLREV_B32_e64_si:
2523   case AMDGPU::V_LSHLREV_B16_e32_vi:
2524   case AMDGPU::V_LSHLREV_B16_e64_vi:
2525   case AMDGPU::V_LSHLREV_B32_e32_vi:
2526   case AMDGPU::V_LSHLREV_B32_e64_vi:
2527   case AMDGPU::V_LSHLREV_B64_vi:
2528   case AMDGPU::V_LSHRREV_B32_e32_si:
2529   case AMDGPU::V_LSHRREV_B32_e64_si:
2530   case AMDGPU::V_LSHRREV_B16_e32_vi:
2531   case AMDGPU::V_LSHRREV_B16_e64_vi:
2532   case AMDGPU::V_LSHRREV_B32_e32_vi:
2533   case AMDGPU::V_LSHRREV_B32_e64_vi:
2534   case AMDGPU::V_LSHRREV_B64_vi:
2535   case AMDGPU::V_ASHRREV_I32_e64_si:
2536   case AMDGPU::V_ASHRREV_I32_e32_si:
2537   case AMDGPU::V_ASHRREV_I16_e32_vi:
2538   case AMDGPU::V_ASHRREV_I16_e64_vi:
2539   case AMDGPU::V_ASHRREV_I32_e32_vi:
2540   case AMDGPU::V_ASHRREV_I32_e64_vi:
2541   case AMDGPU::V_ASHRREV_I64_vi:
2542   case AMDGPU::V_PK_LSHLREV_B16_vi:
2543   case AMDGPU::V_PK_LSHRREV_B16_vi:
2544   case AMDGPU::V_PK_ASHRREV_I16_vi:
2545     return false;
2546   default:
2547     return true;
2548   }
2549 }
2550 
2551 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2552   unsigned Opcode = Inst.getOpcode();
2553   const MCInstrDesc &Desc = MII.get(Opcode);
2554   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2555     return true;
2556 
2557   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2558   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2559 
2560   const int OpIndices[] = { Src0Idx, Src1Idx };
2561 
2562   unsigned NumLiterals = 0;
2563   uint32_t LiteralValue;
2564 
2565   for (int OpIdx : OpIndices) {
2566     if (OpIdx == -1) break;
2567 
2568     const MCOperand &MO = Inst.getOperand(OpIdx);
2569     if (MO.isImm() &&
2570         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2571         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2572         !isInlineConstant(Inst, OpIdx)) {
2573       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2574       if (NumLiterals == 0 || LiteralValue != Value) {
2575         LiteralValue = Value;
2576         ++NumLiterals;
2577       }
2578     }
2579   }
2580 
2581   return NumLiterals <= 1;
2582 }
2583 
2584 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2585                                           const SMLoc &IDLoc) {
2586   if (!validateLdsDirect(Inst)) {
2587     Error(IDLoc,
2588       "invalid use of lds_direct");
2589     return false;
2590   }
2591   if (!validateSOPLiteral(Inst)) {
2592     Error(IDLoc,
2593       "only one literal operand is allowed");
2594     return false;
2595   }
2596   if (!validateConstantBusLimitations(Inst)) {
2597     Error(IDLoc,
2598       "invalid operand (violates constant bus restrictions)");
2599     return false;
2600   }
2601   if (!validateEarlyClobberLimitations(Inst)) {
2602     Error(IDLoc,
2603       "destination must be different than all sources");
2604     return false;
2605   }
2606   if (!validateIntClampSupported(Inst)) {
2607     Error(IDLoc,
2608       "integer clamping is not supported on this GPU");
2609     return false;
2610   }
2611   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2612   if (!validateMIMGD16(Inst)) {
2613     Error(IDLoc,
2614       "d16 modifier is not supported on this GPU");
2615     return false;
2616   }
2617   if (!validateMIMGDataSize(Inst)) {
2618     Error(IDLoc,
2619       "image data size does not match dmask and tfe");
2620     return false;
2621   }
2622   if (!validateMIMGAtomicDMask(Inst)) {
2623     Error(IDLoc,
2624       "invalid atomic image dmask");
2625     return false;
2626   }
2627   if (!validateMIMGGatherDMask(Inst)) {
2628     Error(IDLoc,
2629       "invalid image_gather dmask: only one bit must be set");
2630     return false;
2631   }
2632 
2633   return true;
2634 }
2635 
2636 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2637                                             unsigned VariantID = 0);
2638 
2639 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2640                                               OperandVector &Operands,
2641                                               MCStreamer &Out,
2642                                               uint64_t &ErrorInfo,
2643                                               bool MatchingInlineAsm) {
2644   MCInst Inst;
2645   unsigned Result = Match_Success;
2646   for (auto Variant : getMatchedVariants()) {
2647     uint64_t EI;
2648     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2649                                   Variant);
2650     // We order match statuses from least to most specific. We use most specific
2651     // status as resulting
2652     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2653     if ((R == Match_Success) ||
2654         (R == Match_PreferE32) ||
2655         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2656         (R == Match_InvalidOperand && Result != Match_MissingFeature
2657                                    && Result != Match_PreferE32) ||
2658         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2659                                    && Result != Match_MissingFeature
2660                                    && Result != Match_PreferE32)) {
2661       Result = R;
2662       ErrorInfo = EI;
2663     }
2664     if (R == Match_Success)
2665       break;
2666   }
2667 
2668   switch (Result) {
2669   default: break;
2670   case Match_Success:
2671     if (!validateInstruction(Inst, IDLoc)) {
2672       return true;
2673     }
2674     Inst.setLoc(IDLoc);
2675     Out.EmitInstruction(Inst, getSTI());
2676     return false;
2677 
2678   case Match_MissingFeature:
2679     return Error(IDLoc, "instruction not supported on this GPU");
2680 
2681   case Match_MnemonicFail: {
2682     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2683     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2684         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2685     return Error(IDLoc, "invalid instruction" + Suggestion,
2686                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2687   }
2688 
2689   case Match_InvalidOperand: {
2690     SMLoc ErrorLoc = IDLoc;
2691     if (ErrorInfo != ~0ULL) {
2692       if (ErrorInfo >= Operands.size()) {
2693         return Error(IDLoc, "too few operands for instruction");
2694       }
2695       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2696       if (ErrorLoc == SMLoc())
2697         ErrorLoc = IDLoc;
2698     }
2699     return Error(ErrorLoc, "invalid operand for instruction");
2700   }
2701 
2702   case Match_PreferE32:
2703     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2704                         "should be encoded as e32");
2705   }
2706   llvm_unreachable("Implement any new match types added!");
2707 }
2708 
2709 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2710   int64_t Tmp = -1;
2711   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2712     return true;
2713   }
2714   if (getParser().parseAbsoluteExpression(Tmp)) {
2715     return true;
2716   }
2717   Ret = static_cast<uint32_t>(Tmp);
2718   return false;
2719 }
2720 
2721 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2722                                                uint32_t &Minor) {
2723   if (ParseAsAbsoluteExpression(Major))
2724     return TokError("invalid major version");
2725 
2726   if (getLexer().isNot(AsmToken::Comma))
2727     return TokError("minor version number required, comma expected");
2728   Lex();
2729 
2730   if (ParseAsAbsoluteExpression(Minor))
2731     return TokError("invalid minor version");
2732 
2733   return false;
2734 }
2735 
2736 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2737   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2738     return TokError("directive only supported for amdgcn architecture");
2739 
2740   std::string Target;
2741 
2742   SMLoc TargetStart = getTok().getLoc();
2743   if (getParser().parseEscapedString(Target))
2744     return true;
2745   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2746 
2747   std::string ExpectedTarget;
2748   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2749   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2750 
2751   if (Target != ExpectedTargetOS.str())
2752     return getParser().Error(TargetRange.Start, "target must match options",
2753                              TargetRange);
2754 
2755   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2756   return false;
2757 }
2758 
2759 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2760   return getParser().Error(Range.Start, "value out of range", Range);
2761 }
2762 
2763 bool AMDGPUAsmParser::calculateGPRBlocks(
2764     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2765     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2766     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2767     unsigned &SGPRBlocks) {
2768   // TODO(scott.linder): These calculations are duplicated from
2769   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2770   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2771 
2772   unsigned NumVGPRs = NextFreeVGPR;
2773   unsigned NumSGPRs = NextFreeSGPR;
2774   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2775 
2776   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2777       NumSGPRs > MaxAddressableNumSGPRs)
2778     return OutOfRangeError(SGPRRange);
2779 
2780   NumSGPRs +=
2781       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2782 
2783   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2784       NumSGPRs > MaxAddressableNumSGPRs)
2785     return OutOfRangeError(SGPRRange);
2786 
2787   if (Features.test(FeatureSGPRInitBug))
2788     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2789 
2790   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2791   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2792 
2793   return false;
2794 }
2795 
2796 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2797   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2798     return TokError("directive only supported for amdgcn architecture");
2799 
2800   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2801     return TokError("directive only supported for amdhsa OS");
2802 
2803   StringRef KernelName;
2804   if (getParser().parseIdentifier(KernelName))
2805     return true;
2806 
2807   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2808 
2809   StringSet<> Seen;
2810 
2811   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2812 
2813   SMRange VGPRRange;
2814   uint64_t NextFreeVGPR = 0;
2815   SMRange SGPRRange;
2816   uint64_t NextFreeSGPR = 0;
2817   unsigned UserSGPRCount = 0;
2818   bool ReserveVCC = true;
2819   bool ReserveFlatScr = true;
2820   bool ReserveXNACK = hasXNACK();
2821 
2822   while (true) {
2823     while (getLexer().is(AsmToken::EndOfStatement))
2824       Lex();
2825 
2826     if (getLexer().isNot(AsmToken::Identifier))
2827       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2828 
2829     StringRef ID = getTok().getIdentifier();
2830     SMRange IDRange = getTok().getLocRange();
2831     Lex();
2832 
2833     if (ID == ".end_amdhsa_kernel")
2834       break;
2835 
2836     if (Seen.find(ID) != Seen.end())
2837       return TokError(".amdhsa_ directives cannot be repeated");
2838     Seen.insert(ID);
2839 
2840     SMLoc ValStart = getTok().getLoc();
2841     int64_t IVal;
2842     if (getParser().parseAbsoluteExpression(IVal))
2843       return true;
2844     SMLoc ValEnd = getTok().getLoc();
2845     SMRange ValRange = SMRange(ValStart, ValEnd);
2846 
2847     if (IVal < 0)
2848       return OutOfRangeError(ValRange);
2849 
2850     uint64_t Val = IVal;
2851 
2852 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2853   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2854     return OutOfRangeError(RANGE);                                             \
2855   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2856 
2857     if (ID == ".amdhsa_group_segment_fixed_size") {
2858       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2859         return OutOfRangeError(ValRange);
2860       KD.group_segment_fixed_size = Val;
2861     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2862       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2863         return OutOfRangeError(ValRange);
2864       KD.private_segment_fixed_size = Val;
2865     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2866       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2867                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2868                        Val, ValRange);
2869       UserSGPRCount++;
2870     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2871       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2872                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2873                        ValRange);
2874       UserSGPRCount++;
2875     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2876       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2877                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2878                        ValRange);
2879       UserSGPRCount++;
2880     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2881       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2882                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2883                        Val, ValRange);
2884       UserSGPRCount++;
2885     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2886       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2887                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2888                        ValRange);
2889       UserSGPRCount++;
2890     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2891       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2892                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2893                        ValRange);
2894       UserSGPRCount++;
2895     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2896       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2897                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2898                        Val, ValRange);
2899       UserSGPRCount++;
2900     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2901       PARSE_BITS_ENTRY(
2902           KD.compute_pgm_rsrc2,
2903           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2904           ValRange);
2905     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2906       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2907                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2908                        ValRange);
2909     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2910       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2911                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2912                        ValRange);
2913     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2914       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2915                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2916                        ValRange);
2917     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2918       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2919                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2920                        ValRange);
2921     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2922       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2923                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2924                        ValRange);
2925     } else if (ID == ".amdhsa_next_free_vgpr") {
2926       VGPRRange = ValRange;
2927       NextFreeVGPR = Val;
2928     } else if (ID == ".amdhsa_next_free_sgpr") {
2929       SGPRRange = ValRange;
2930       NextFreeSGPR = Val;
2931     } else if (ID == ".amdhsa_reserve_vcc") {
2932       if (!isUInt<1>(Val))
2933         return OutOfRangeError(ValRange);
2934       ReserveVCC = Val;
2935     } else if (ID == ".amdhsa_reserve_flat_scratch") {
2936       if (IVersion.Major < 7)
2937         return getParser().Error(IDRange.Start, "directive requires gfx7+",
2938                                  IDRange);
2939       if (!isUInt<1>(Val))
2940         return OutOfRangeError(ValRange);
2941       ReserveFlatScr = Val;
2942     } else if (ID == ".amdhsa_reserve_xnack_mask") {
2943       if (IVersion.Major < 8)
2944         return getParser().Error(IDRange.Start, "directive requires gfx8+",
2945                                  IDRange);
2946       if (!isUInt<1>(Val))
2947         return OutOfRangeError(ValRange);
2948       ReserveXNACK = Val;
2949     } else if (ID == ".amdhsa_float_round_mode_32") {
2950       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2951                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2952     } else if (ID == ".amdhsa_float_round_mode_16_64") {
2953       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2954                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2955     } else if (ID == ".amdhsa_float_denorm_mode_32") {
2956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2957                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2958     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2959       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2960                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2961                        ValRange);
2962     } else if (ID == ".amdhsa_dx10_clamp") {
2963       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2964                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
2965     } else if (ID == ".amdhsa_ieee_mode") {
2966       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
2967                        Val, ValRange);
2968     } else if (ID == ".amdhsa_fp16_overflow") {
2969       if (IVersion.Major < 9)
2970         return getParser().Error(IDRange.Start, "directive requires gfx9+",
2971                                  IDRange);
2972       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
2973                        ValRange);
2974     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
2975       PARSE_BITS_ENTRY(
2976           KD.compute_pgm_rsrc2,
2977           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
2978           ValRange);
2979     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
2980       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2981                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
2982                        Val, ValRange);
2983     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
2984       PARSE_BITS_ENTRY(
2985           KD.compute_pgm_rsrc2,
2986           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
2987           ValRange);
2988     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
2989       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2990                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
2991                        Val, ValRange);
2992     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
2993       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2994                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
2995                        Val, ValRange);
2996     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
2997       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2998                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
2999                        Val, ValRange);
3000     } else if (ID == ".amdhsa_exception_int_div_zero") {
3001       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3002                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3003                        Val, ValRange);
3004     } else {
3005       return getParser().Error(IDRange.Start,
3006                                "unknown .amdhsa_kernel directive", IDRange);
3007     }
3008 
3009 #undef PARSE_BITS_ENTRY
3010   }
3011 
3012   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3013     return TokError(".amdhsa_next_free_vgpr directive is required");
3014 
3015   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3016     return TokError(".amdhsa_next_free_sgpr directive is required");
3017 
3018   unsigned VGPRBlocks;
3019   unsigned SGPRBlocks;
3020   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3021                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3022                          SGPRRange, VGPRBlocks, SGPRBlocks))
3023     return true;
3024 
3025   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3026           VGPRBlocks))
3027     return OutOfRangeError(VGPRRange);
3028   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3029                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3030 
3031   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3032           SGPRBlocks))
3033     return OutOfRangeError(SGPRRange);
3034   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3035                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3036                   SGPRBlocks);
3037 
3038   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3039     return TokError("too many user SGPRs enabled");
3040   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3041                   UserSGPRCount);
3042 
3043   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3044       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3045       ReserveFlatScr, ReserveXNACK);
3046   return false;
3047 }
3048 
3049 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3050   uint32_t Major;
3051   uint32_t Minor;
3052 
3053   if (ParseDirectiveMajorMinor(Major, Minor))
3054     return true;
3055 
3056   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3057   return false;
3058 }
3059 
3060 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3061   uint32_t Major;
3062   uint32_t Minor;
3063   uint32_t Stepping;
3064   StringRef VendorName;
3065   StringRef ArchName;
3066 
3067   // If this directive has no arguments, then use the ISA version for the
3068   // targeted GPU.
3069   if (getLexer().is(AsmToken::EndOfStatement)) {
3070     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3071     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3072                                                       ISA.Stepping,
3073                                                       "AMD", "AMDGPU");
3074     return false;
3075   }
3076 
3077   if (ParseDirectiveMajorMinor(Major, Minor))
3078     return true;
3079 
3080   if (getLexer().isNot(AsmToken::Comma))
3081     return TokError("stepping version number required, comma expected");
3082   Lex();
3083 
3084   if (ParseAsAbsoluteExpression(Stepping))
3085     return TokError("invalid stepping version");
3086 
3087   if (getLexer().isNot(AsmToken::Comma))
3088     return TokError("vendor name required, comma expected");
3089   Lex();
3090 
3091   if (getLexer().isNot(AsmToken::String))
3092     return TokError("invalid vendor name");
3093 
3094   VendorName = getLexer().getTok().getStringContents();
3095   Lex();
3096 
3097   if (getLexer().isNot(AsmToken::Comma))
3098     return TokError("arch name required, comma expected");
3099   Lex();
3100 
3101   if (getLexer().isNot(AsmToken::String))
3102     return TokError("invalid arch name");
3103 
3104   ArchName = getLexer().getTok().getStringContents();
3105   Lex();
3106 
3107   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3108                                                     VendorName, ArchName);
3109   return false;
3110 }
3111 
3112 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3113                                                amd_kernel_code_t &Header) {
3114   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3115   // assembly for backwards compatibility.
3116   if (ID == "max_scratch_backing_memory_byte_size") {
3117     Parser.eatToEndOfStatement();
3118     return false;
3119   }
3120 
3121   SmallString<40> ErrStr;
3122   raw_svector_ostream Err(ErrStr);
3123   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3124     return TokError(Err.str());
3125   }
3126   Lex();
3127   return false;
3128 }
3129 
3130 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3131   amd_kernel_code_t Header;
3132   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3133 
3134   while (true) {
3135     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3136     // will set the current token to EndOfStatement.
3137     while(getLexer().is(AsmToken::EndOfStatement))
3138       Lex();
3139 
3140     if (getLexer().isNot(AsmToken::Identifier))
3141       return TokError("expected value identifier or .end_amd_kernel_code_t");
3142 
3143     StringRef ID = getLexer().getTok().getIdentifier();
3144     Lex();
3145 
3146     if (ID == ".end_amd_kernel_code_t")
3147       break;
3148 
3149     if (ParseAMDKernelCodeTValue(ID, Header))
3150       return true;
3151   }
3152 
3153   getTargetStreamer().EmitAMDKernelCodeT(Header);
3154 
3155   return false;
3156 }
3157 
3158 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3159   if (getLexer().isNot(AsmToken::Identifier))
3160     return TokError("expected symbol name");
3161 
3162   StringRef KernelName = Parser.getTok().getString();
3163 
3164   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3165                                            ELF::STT_AMDGPU_HSA_KERNEL);
3166   Lex();
3167   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3168     KernelScope.initialize(getContext());
3169   return false;
3170 }
3171 
3172 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3173   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3174     return Error(getParser().getTok().getLoc(),
3175                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3176                  "architectures");
3177   }
3178 
3179   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3180 
3181   std::string ISAVersionStringFromSTI;
3182   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3183   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3184 
3185   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3186     return Error(getParser().getTok().getLoc(),
3187                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3188                  "arguments specified through the command line");
3189   }
3190 
3191   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3192   Lex();
3193 
3194   return false;
3195 }
3196 
3197 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3198   const char *AssemblerDirectiveBegin;
3199   const char *AssemblerDirectiveEnd;
3200   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3201       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3202           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3203                             HSAMD::V3::AssemblerDirectiveEnd)
3204           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3205                             HSAMD::AssemblerDirectiveEnd);
3206 
3207   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3208     return Error(getParser().getTok().getLoc(),
3209                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3210                  "not available on non-amdhsa OSes")).str());
3211   }
3212 
3213   std::string HSAMetadataString;
3214   raw_string_ostream YamlStream(HSAMetadataString);
3215 
3216   getLexer().setSkipSpace(false);
3217 
3218   bool FoundEnd = false;
3219   while (!getLexer().is(AsmToken::Eof)) {
3220     while (getLexer().is(AsmToken::Space)) {
3221       YamlStream << getLexer().getTok().getString();
3222       Lex();
3223     }
3224 
3225     if (getLexer().is(AsmToken::Identifier)) {
3226       StringRef ID = getLexer().getTok().getIdentifier();
3227       if (ID == AssemblerDirectiveEnd) {
3228         Lex();
3229         FoundEnd = true;
3230         break;
3231       }
3232     }
3233 
3234     YamlStream << Parser.parseStringToEndOfStatement()
3235                << getContext().getAsmInfo()->getSeparatorString();
3236 
3237     Parser.eatToEndOfStatement();
3238   }
3239 
3240   getLexer().setSkipSpace(true);
3241 
3242   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3243     return TokError(Twine("expected directive ") +
3244                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3245   }
3246 
3247   YamlStream.flush();
3248 
3249   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3250     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3251       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3252   } else {
3253     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3254       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3255   }
3256 
3257   return false;
3258 }
3259 
3260 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3261   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3262     return Error(getParser().getTok().getLoc(),
3263                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3264                  "not available on non-amdpal OSes")).str());
3265   }
3266 
3267   PALMD::Metadata PALMetadata;
3268   for (;;) {
3269     uint32_t Value;
3270     if (ParseAsAbsoluteExpression(Value)) {
3271       return TokError(Twine("invalid value in ") +
3272                       Twine(PALMD::AssemblerDirective));
3273     }
3274     PALMetadata.push_back(Value);
3275     if (getLexer().isNot(AsmToken::Comma))
3276       break;
3277     Lex();
3278   }
3279   getTargetStreamer().EmitPALMetadata(PALMetadata);
3280   return false;
3281 }
3282 
3283 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3284   StringRef IDVal = DirectiveID.getString();
3285 
3286   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3287     if (IDVal == ".amdgcn_target")
3288       return ParseDirectiveAMDGCNTarget();
3289 
3290     if (IDVal == ".amdhsa_kernel")
3291       return ParseDirectiveAMDHSAKernel();
3292 
3293     // TODO: Restructure/combine with PAL metadata directive.
3294     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3295       return ParseDirectiveHSAMetadata();
3296   } else {
3297     if (IDVal == ".hsa_code_object_version")
3298       return ParseDirectiveHSACodeObjectVersion();
3299 
3300     if (IDVal == ".hsa_code_object_isa")
3301       return ParseDirectiveHSACodeObjectISA();
3302 
3303     if (IDVal == ".amd_kernel_code_t")
3304       return ParseDirectiveAMDKernelCodeT();
3305 
3306     if (IDVal == ".amdgpu_hsa_kernel")
3307       return ParseDirectiveAMDGPUHsaKernel();
3308 
3309     if (IDVal == ".amd_amdgpu_isa")
3310       return ParseDirectiveISAVersion();
3311 
3312     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3313       return ParseDirectiveHSAMetadata();
3314   }
3315 
3316   if (IDVal == PALMD::AssemblerDirective)
3317     return ParseDirectivePALMetadata();
3318 
3319   return true;
3320 }
3321 
3322 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3323                                            unsigned RegNo) const {
3324 
3325   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3326        R.isValid(); ++R) {
3327     if (*R == RegNo)
3328       return isGFX9();
3329   }
3330 
3331   switch (RegNo) {
3332   case AMDGPU::TBA:
3333   case AMDGPU::TBA_LO:
3334   case AMDGPU::TBA_HI:
3335   case AMDGPU::TMA:
3336   case AMDGPU::TMA_LO:
3337   case AMDGPU::TMA_HI:
3338     return !isGFX9();
3339   case AMDGPU::XNACK_MASK:
3340   case AMDGPU::XNACK_MASK_LO:
3341   case AMDGPU::XNACK_MASK_HI:
3342     return !isCI() && !isSI() && hasXNACK();
3343   default:
3344     break;
3345   }
3346 
3347   if (isCI())
3348     return true;
3349 
3350   if (isSI()) {
3351     // No flat_scr
3352     switch (RegNo) {
3353     case AMDGPU::FLAT_SCR:
3354     case AMDGPU::FLAT_SCR_LO:
3355     case AMDGPU::FLAT_SCR_HI:
3356       return false;
3357     default:
3358       return true;
3359     }
3360   }
3361 
3362   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3363   // SI/CI have.
3364   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3365        R.isValid(); ++R) {
3366     if (*R == RegNo)
3367       return false;
3368   }
3369 
3370   return true;
3371 }
3372 
3373 OperandMatchResultTy
3374 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3375   // Try to parse with a custom parser
3376   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3377 
3378   // If we successfully parsed the operand or if there as an error parsing,
3379   // we are done.
3380   //
3381   // If we are parsing after we reach EndOfStatement then this means we
3382   // are appending default values to the Operands list.  This is only done
3383   // by custom parser, so we shouldn't continue on to the generic parsing.
3384   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3385       getLexer().is(AsmToken::EndOfStatement))
3386     return ResTy;
3387 
3388   ResTy = parseRegOrImm(Operands);
3389 
3390   if (ResTy == MatchOperand_Success)
3391     return ResTy;
3392 
3393   const auto &Tok = Parser.getTok();
3394   SMLoc S = Tok.getLoc();
3395 
3396   const MCExpr *Expr = nullptr;
3397   if (!Parser.parseExpression(Expr)) {
3398     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3399     return MatchOperand_Success;
3400   }
3401 
3402   // Possibly this is an instruction flag like 'gds'.
3403   if (Tok.getKind() == AsmToken::Identifier) {
3404     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3405     Parser.Lex();
3406     return MatchOperand_Success;
3407   }
3408 
3409   return MatchOperand_NoMatch;
3410 }
3411 
3412 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3413   // Clear any forced encodings from the previous instruction.
3414   setForcedEncodingSize(0);
3415   setForcedDPP(false);
3416   setForcedSDWA(false);
3417 
3418   if (Name.endswith("_e64")) {
3419     setForcedEncodingSize(64);
3420     return Name.substr(0, Name.size() - 4);
3421   } else if (Name.endswith("_e32")) {
3422     setForcedEncodingSize(32);
3423     return Name.substr(0, Name.size() - 4);
3424   } else if (Name.endswith("_dpp")) {
3425     setForcedDPP(true);
3426     return Name.substr(0, Name.size() - 4);
3427   } else if (Name.endswith("_sdwa")) {
3428     setForcedSDWA(true);
3429     return Name.substr(0, Name.size() - 5);
3430   }
3431   return Name;
3432 }
3433 
3434 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3435                                        StringRef Name,
3436                                        SMLoc NameLoc, OperandVector &Operands) {
3437   // Add the instruction mnemonic
3438   Name = parseMnemonicSuffix(Name);
3439   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3440 
3441   while (!getLexer().is(AsmToken::EndOfStatement)) {
3442     OperandMatchResultTy Res = parseOperand(Operands, Name);
3443 
3444     // Eat the comma or space if there is one.
3445     if (getLexer().is(AsmToken::Comma))
3446       Parser.Lex();
3447 
3448     switch (Res) {
3449       case MatchOperand_Success: break;
3450       case MatchOperand_ParseFail:
3451         Error(getLexer().getLoc(), "failed parsing operand.");
3452         while (!getLexer().is(AsmToken::EndOfStatement)) {
3453           Parser.Lex();
3454         }
3455         return true;
3456       case MatchOperand_NoMatch:
3457         Error(getLexer().getLoc(), "not a valid operand.");
3458         while (!getLexer().is(AsmToken::EndOfStatement)) {
3459           Parser.Lex();
3460         }
3461         return true;
3462     }
3463   }
3464 
3465   return false;
3466 }
3467 
3468 //===----------------------------------------------------------------------===//
3469 // Utility functions
3470 //===----------------------------------------------------------------------===//
3471 
3472 OperandMatchResultTy
3473 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3474   switch(getLexer().getKind()) {
3475     default: return MatchOperand_NoMatch;
3476     case AsmToken::Identifier: {
3477       StringRef Name = Parser.getTok().getString();
3478       if (!Name.equals(Prefix)) {
3479         return MatchOperand_NoMatch;
3480       }
3481 
3482       Parser.Lex();
3483       if (getLexer().isNot(AsmToken::Colon))
3484         return MatchOperand_ParseFail;
3485 
3486       Parser.Lex();
3487 
3488       bool IsMinus = false;
3489       if (getLexer().getKind() == AsmToken::Minus) {
3490         Parser.Lex();
3491         IsMinus = true;
3492       }
3493 
3494       if (getLexer().isNot(AsmToken::Integer))
3495         return MatchOperand_ParseFail;
3496 
3497       if (getParser().parseAbsoluteExpression(Int))
3498         return MatchOperand_ParseFail;
3499 
3500       if (IsMinus)
3501         Int = -Int;
3502       break;
3503     }
3504   }
3505   return MatchOperand_Success;
3506 }
3507 
3508 OperandMatchResultTy
3509 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3510                                     AMDGPUOperand::ImmTy ImmTy,
3511                                     bool (*ConvertResult)(int64_t&)) {
3512   SMLoc S = Parser.getTok().getLoc();
3513   int64_t Value = 0;
3514 
3515   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3516   if (Res != MatchOperand_Success)
3517     return Res;
3518 
3519   if (ConvertResult && !ConvertResult(Value)) {
3520     return MatchOperand_ParseFail;
3521   }
3522 
3523   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3524   return MatchOperand_Success;
3525 }
3526 
3527 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3528   const char *Prefix,
3529   OperandVector &Operands,
3530   AMDGPUOperand::ImmTy ImmTy,
3531   bool (*ConvertResult)(int64_t&)) {
3532   StringRef Name = Parser.getTok().getString();
3533   if (!Name.equals(Prefix))
3534     return MatchOperand_NoMatch;
3535 
3536   Parser.Lex();
3537   if (getLexer().isNot(AsmToken::Colon))
3538     return MatchOperand_ParseFail;
3539 
3540   Parser.Lex();
3541   if (getLexer().isNot(AsmToken::LBrac))
3542     return MatchOperand_ParseFail;
3543   Parser.Lex();
3544 
3545   unsigned Val = 0;
3546   SMLoc S = Parser.getTok().getLoc();
3547 
3548   // FIXME: How to verify the number of elements matches the number of src
3549   // operands?
3550   for (int I = 0; I < 4; ++I) {
3551     if (I != 0) {
3552       if (getLexer().is(AsmToken::RBrac))
3553         break;
3554 
3555       if (getLexer().isNot(AsmToken::Comma))
3556         return MatchOperand_ParseFail;
3557       Parser.Lex();
3558     }
3559 
3560     if (getLexer().isNot(AsmToken::Integer))
3561       return MatchOperand_ParseFail;
3562 
3563     int64_t Op;
3564     if (getParser().parseAbsoluteExpression(Op))
3565       return MatchOperand_ParseFail;
3566 
3567     if (Op != 0 && Op != 1)
3568       return MatchOperand_ParseFail;
3569     Val |= (Op << I);
3570   }
3571 
3572   Parser.Lex();
3573   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3574   return MatchOperand_Success;
3575 }
3576 
3577 OperandMatchResultTy
3578 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3579                                AMDGPUOperand::ImmTy ImmTy) {
3580   int64_t Bit = 0;
3581   SMLoc S = Parser.getTok().getLoc();
3582 
3583   // We are at the end of the statement, and this is a default argument, so
3584   // use a default value.
3585   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3586     switch(getLexer().getKind()) {
3587       case AsmToken::Identifier: {
3588         StringRef Tok = Parser.getTok().getString();
3589         if (Tok == Name) {
3590           if (Tok == "r128" && isGFX9())
3591             Error(S, "r128 modifier is not supported on this GPU");
3592           if (Tok == "a16" && !isGFX9())
3593             Error(S, "a16 modifier is not supported on this GPU");
3594           Bit = 1;
3595           Parser.Lex();
3596         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3597           Bit = 0;
3598           Parser.Lex();
3599         } else {
3600           return MatchOperand_NoMatch;
3601         }
3602         break;
3603       }
3604       default:
3605         return MatchOperand_NoMatch;
3606     }
3607   }
3608 
3609   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3610   return MatchOperand_Success;
3611 }
3612 
3613 static void addOptionalImmOperand(
3614   MCInst& Inst, const OperandVector& Operands,
3615   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3616   AMDGPUOperand::ImmTy ImmT,
3617   int64_t Default = 0) {
3618   auto i = OptionalIdx.find(ImmT);
3619   if (i != OptionalIdx.end()) {
3620     unsigned Idx = i->second;
3621     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3622   } else {
3623     Inst.addOperand(MCOperand::createImm(Default));
3624   }
3625 }
3626 
3627 OperandMatchResultTy
3628 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3629   if (getLexer().isNot(AsmToken::Identifier)) {
3630     return MatchOperand_NoMatch;
3631   }
3632   StringRef Tok = Parser.getTok().getString();
3633   if (Tok != Prefix) {
3634     return MatchOperand_NoMatch;
3635   }
3636 
3637   Parser.Lex();
3638   if (getLexer().isNot(AsmToken::Colon)) {
3639     return MatchOperand_ParseFail;
3640   }
3641 
3642   Parser.Lex();
3643   if (getLexer().isNot(AsmToken::Identifier)) {
3644     return MatchOperand_ParseFail;
3645   }
3646 
3647   Value = Parser.getTok().getString();
3648   return MatchOperand_Success;
3649 }
3650 
3651 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3652 // values to live in a joint format operand in the MCInst encoding.
3653 OperandMatchResultTy
3654 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3655   SMLoc S = Parser.getTok().getLoc();
3656   int64_t Dfmt = 0, Nfmt = 0;
3657   // dfmt and nfmt can appear in either order, and each is optional.
3658   bool GotDfmt = false, GotNfmt = false;
3659   while (!GotDfmt || !GotNfmt) {
3660     if (!GotDfmt) {
3661       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3662       if (Res != MatchOperand_NoMatch) {
3663         if (Res != MatchOperand_Success)
3664           return Res;
3665         if (Dfmt >= 16) {
3666           Error(Parser.getTok().getLoc(), "out of range dfmt");
3667           return MatchOperand_ParseFail;
3668         }
3669         GotDfmt = true;
3670         Parser.Lex();
3671         continue;
3672       }
3673     }
3674     if (!GotNfmt) {
3675       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3676       if (Res != MatchOperand_NoMatch) {
3677         if (Res != MatchOperand_Success)
3678           return Res;
3679         if (Nfmt >= 8) {
3680           Error(Parser.getTok().getLoc(), "out of range nfmt");
3681           return MatchOperand_ParseFail;
3682         }
3683         GotNfmt = true;
3684         Parser.Lex();
3685         continue;
3686       }
3687     }
3688     break;
3689   }
3690   if (!GotDfmt && !GotNfmt)
3691     return MatchOperand_NoMatch;
3692   auto Format = Dfmt | Nfmt << 4;
3693   Operands.push_back(
3694       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3695   return MatchOperand_Success;
3696 }
3697 
3698 //===----------------------------------------------------------------------===//
3699 // ds
3700 //===----------------------------------------------------------------------===//
3701 
3702 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3703                                     const OperandVector &Operands) {
3704   OptionalImmIndexMap OptionalIdx;
3705 
3706   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3707     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3708 
3709     // Add the register arguments
3710     if (Op.isReg()) {
3711       Op.addRegOperands(Inst, 1);
3712       continue;
3713     }
3714 
3715     // Handle optional arguments
3716     OptionalIdx[Op.getImmTy()] = i;
3717   }
3718 
3719   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3720   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3721   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3722 
3723   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3724 }
3725 
3726 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3727                                 bool IsGdsHardcoded) {
3728   OptionalImmIndexMap OptionalIdx;
3729 
3730   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3731     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3732 
3733     // Add the register arguments
3734     if (Op.isReg()) {
3735       Op.addRegOperands(Inst, 1);
3736       continue;
3737     }
3738 
3739     if (Op.isToken() && Op.getToken() == "gds") {
3740       IsGdsHardcoded = true;
3741       continue;
3742     }
3743 
3744     // Handle optional arguments
3745     OptionalIdx[Op.getImmTy()] = i;
3746   }
3747 
3748   AMDGPUOperand::ImmTy OffsetType =
3749     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3750      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3751                                                       AMDGPUOperand::ImmTyOffset;
3752 
3753   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3754 
3755   if (!IsGdsHardcoded) {
3756     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3757   }
3758   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3759 }
3760 
3761 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3762   OptionalImmIndexMap OptionalIdx;
3763 
3764   unsigned OperandIdx[4];
3765   unsigned EnMask = 0;
3766   int SrcIdx = 0;
3767 
3768   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3769     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3770 
3771     // Add the register arguments
3772     if (Op.isReg()) {
3773       assert(SrcIdx < 4);
3774       OperandIdx[SrcIdx] = Inst.size();
3775       Op.addRegOperands(Inst, 1);
3776       ++SrcIdx;
3777       continue;
3778     }
3779 
3780     if (Op.isOff()) {
3781       assert(SrcIdx < 4);
3782       OperandIdx[SrcIdx] = Inst.size();
3783       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3784       ++SrcIdx;
3785       continue;
3786     }
3787 
3788     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3789       Op.addImmOperands(Inst, 1);
3790       continue;
3791     }
3792 
3793     if (Op.isToken() && Op.getToken() == "done")
3794       continue;
3795 
3796     // Handle optional arguments
3797     OptionalIdx[Op.getImmTy()] = i;
3798   }
3799 
3800   assert(SrcIdx == 4);
3801 
3802   bool Compr = false;
3803   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3804     Compr = true;
3805     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3806     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3807     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3808   }
3809 
3810   for (auto i = 0; i < SrcIdx; ++i) {
3811     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3812       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3813     }
3814   }
3815 
3816   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3817   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3818 
3819   Inst.addOperand(MCOperand::createImm(EnMask));
3820 }
3821 
3822 //===----------------------------------------------------------------------===//
3823 // s_waitcnt
3824 //===----------------------------------------------------------------------===//
3825 
3826 static bool
3827 encodeCnt(
3828   const AMDGPU::IsaVersion ISA,
3829   int64_t &IntVal,
3830   int64_t CntVal,
3831   bool Saturate,
3832   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3833   unsigned (*decode)(const IsaVersion &Version, unsigned))
3834 {
3835   bool Failed = false;
3836 
3837   IntVal = encode(ISA, IntVal, CntVal);
3838   if (CntVal != decode(ISA, IntVal)) {
3839     if (Saturate) {
3840       IntVal = encode(ISA, IntVal, -1);
3841     } else {
3842       Failed = true;
3843     }
3844   }
3845   return Failed;
3846 }
3847 
3848 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3849   StringRef CntName = Parser.getTok().getString();
3850   int64_t CntVal;
3851 
3852   Parser.Lex();
3853   if (getLexer().isNot(AsmToken::LParen))
3854     return true;
3855 
3856   Parser.Lex();
3857   if (getLexer().isNot(AsmToken::Integer))
3858     return true;
3859 
3860   SMLoc ValLoc = Parser.getTok().getLoc();
3861   if (getParser().parseAbsoluteExpression(CntVal))
3862     return true;
3863 
3864   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3865 
3866   bool Failed = true;
3867   bool Sat = CntName.endswith("_sat");
3868 
3869   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3870     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3871   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3872     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3873   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3874     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3875   }
3876 
3877   if (Failed) {
3878     Error(ValLoc, "too large value for " + CntName);
3879     return true;
3880   }
3881 
3882   if (getLexer().isNot(AsmToken::RParen)) {
3883     return true;
3884   }
3885 
3886   Parser.Lex();
3887   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3888     const AsmToken NextToken = getLexer().peekTok();
3889     if (NextToken.is(AsmToken::Identifier)) {
3890       Parser.Lex();
3891     }
3892   }
3893 
3894   return false;
3895 }
3896 
3897 OperandMatchResultTy
3898 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3899   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3900   int64_t Waitcnt = getWaitcntBitMask(ISA);
3901   SMLoc S = Parser.getTok().getLoc();
3902 
3903   switch(getLexer().getKind()) {
3904     default: return MatchOperand_ParseFail;
3905     case AsmToken::Integer:
3906       // The operand can be an integer value.
3907       if (getParser().parseAbsoluteExpression(Waitcnt))
3908         return MatchOperand_ParseFail;
3909       break;
3910 
3911     case AsmToken::Identifier:
3912       do {
3913         if (parseCnt(Waitcnt))
3914           return MatchOperand_ParseFail;
3915       } while(getLexer().isNot(AsmToken::EndOfStatement));
3916       break;
3917   }
3918   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3919   return MatchOperand_Success;
3920 }
3921 
3922 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3923                                           int64_t &Width) {
3924   using namespace llvm::AMDGPU::Hwreg;
3925 
3926   if (Parser.getTok().getString() != "hwreg")
3927     return true;
3928   Parser.Lex();
3929 
3930   if (getLexer().isNot(AsmToken::LParen))
3931     return true;
3932   Parser.Lex();
3933 
3934   if (getLexer().is(AsmToken::Identifier)) {
3935     HwReg.IsSymbolic = true;
3936     HwReg.Id = ID_UNKNOWN_;
3937     const StringRef tok = Parser.getTok().getString();
3938     int Last = ID_SYMBOLIC_LAST_;
3939     if (isSI() || isCI() || isVI())
3940       Last = ID_SYMBOLIC_FIRST_GFX9_;
3941     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3942       if (tok == IdSymbolic[i]) {
3943         HwReg.Id = i;
3944         break;
3945       }
3946     }
3947     Parser.Lex();
3948   } else {
3949     HwReg.IsSymbolic = false;
3950     if (getLexer().isNot(AsmToken::Integer))
3951       return true;
3952     if (getParser().parseAbsoluteExpression(HwReg.Id))
3953       return true;
3954   }
3955 
3956   if (getLexer().is(AsmToken::RParen)) {
3957     Parser.Lex();
3958     return false;
3959   }
3960 
3961   // optional params
3962   if (getLexer().isNot(AsmToken::Comma))
3963     return true;
3964   Parser.Lex();
3965 
3966   if (getLexer().isNot(AsmToken::Integer))
3967     return true;
3968   if (getParser().parseAbsoluteExpression(Offset))
3969     return true;
3970 
3971   if (getLexer().isNot(AsmToken::Comma))
3972     return true;
3973   Parser.Lex();
3974 
3975   if (getLexer().isNot(AsmToken::Integer))
3976     return true;
3977   if (getParser().parseAbsoluteExpression(Width))
3978     return true;
3979 
3980   if (getLexer().isNot(AsmToken::RParen))
3981     return true;
3982   Parser.Lex();
3983 
3984   return false;
3985 }
3986 
3987 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3988   using namespace llvm::AMDGPU::Hwreg;
3989 
3990   int64_t Imm16Val = 0;
3991   SMLoc S = Parser.getTok().getLoc();
3992 
3993   switch(getLexer().getKind()) {
3994     default: return MatchOperand_NoMatch;
3995     case AsmToken::Integer:
3996       // The operand can be an integer value.
3997       if (getParser().parseAbsoluteExpression(Imm16Val))
3998         return MatchOperand_NoMatch;
3999       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4000         Error(S, "invalid immediate: only 16-bit values are legal");
4001         // Do not return error code, but create an imm operand anyway and proceed
4002         // to the next operand, if any. That avoids unneccessary error messages.
4003       }
4004       break;
4005 
4006     case AsmToken::Identifier: {
4007         OperandInfoTy HwReg(ID_UNKNOWN_);
4008         int64_t Offset = OFFSET_DEFAULT_;
4009         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4010         if (parseHwregConstruct(HwReg, Offset, Width))
4011           return MatchOperand_ParseFail;
4012         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4013           if (HwReg.IsSymbolic)
4014             Error(S, "invalid symbolic name of hardware register");
4015           else
4016             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4017         }
4018         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4019           Error(S, "invalid bit offset: only 5-bit values are legal");
4020         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4021           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4022         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4023       }
4024       break;
4025   }
4026   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4027   return MatchOperand_Success;
4028 }
4029 
4030 bool AMDGPUOperand::isSWaitCnt() const {
4031   return isImm();
4032 }
4033 
4034 bool AMDGPUOperand::isHwreg() const {
4035   return isImmTy(ImmTyHwreg);
4036 }
4037 
4038 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4039   using namespace llvm::AMDGPU::SendMsg;
4040 
4041   if (Parser.getTok().getString() != "sendmsg")
4042     return true;
4043   Parser.Lex();
4044 
4045   if (getLexer().isNot(AsmToken::LParen))
4046     return true;
4047   Parser.Lex();
4048 
4049   if (getLexer().is(AsmToken::Identifier)) {
4050     Msg.IsSymbolic = true;
4051     Msg.Id = ID_UNKNOWN_;
4052     const std::string tok = Parser.getTok().getString();
4053     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4054       switch(i) {
4055         default: continue; // Omit gaps.
4056         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4057       }
4058       if (tok == IdSymbolic[i]) {
4059         Msg.Id = i;
4060         break;
4061       }
4062     }
4063     Parser.Lex();
4064   } else {
4065     Msg.IsSymbolic = false;
4066     if (getLexer().isNot(AsmToken::Integer))
4067       return true;
4068     if (getParser().parseAbsoluteExpression(Msg.Id))
4069       return true;
4070     if (getLexer().is(AsmToken::Integer))
4071       if (getParser().parseAbsoluteExpression(Msg.Id))
4072         Msg.Id = ID_UNKNOWN_;
4073   }
4074   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4075     return false;
4076 
4077   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4078     if (getLexer().isNot(AsmToken::RParen))
4079       return true;
4080     Parser.Lex();
4081     return false;
4082   }
4083 
4084   if (getLexer().isNot(AsmToken::Comma))
4085     return true;
4086   Parser.Lex();
4087 
4088   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4089   Operation.Id = ID_UNKNOWN_;
4090   if (getLexer().is(AsmToken::Identifier)) {
4091     Operation.IsSymbolic = true;
4092     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4093     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4094     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4095     const StringRef Tok = Parser.getTok().getString();
4096     for (int i = F; i < L; ++i) {
4097       if (Tok == S[i]) {
4098         Operation.Id = i;
4099         break;
4100       }
4101     }
4102     Parser.Lex();
4103   } else {
4104     Operation.IsSymbolic = false;
4105     if (getLexer().isNot(AsmToken::Integer))
4106       return true;
4107     if (getParser().parseAbsoluteExpression(Operation.Id))
4108       return true;
4109   }
4110 
4111   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4112     // Stream id is optional.
4113     if (getLexer().is(AsmToken::RParen)) {
4114       Parser.Lex();
4115       return false;
4116     }
4117 
4118     if (getLexer().isNot(AsmToken::Comma))
4119       return true;
4120     Parser.Lex();
4121 
4122     if (getLexer().isNot(AsmToken::Integer))
4123       return true;
4124     if (getParser().parseAbsoluteExpression(StreamId))
4125       return true;
4126   }
4127 
4128   if (getLexer().isNot(AsmToken::RParen))
4129     return true;
4130   Parser.Lex();
4131   return false;
4132 }
4133 
4134 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4135   if (getLexer().getKind() != AsmToken::Identifier)
4136     return MatchOperand_NoMatch;
4137 
4138   StringRef Str = Parser.getTok().getString();
4139   int Slot = StringSwitch<int>(Str)
4140     .Case("p10", 0)
4141     .Case("p20", 1)
4142     .Case("p0", 2)
4143     .Default(-1);
4144 
4145   SMLoc S = Parser.getTok().getLoc();
4146   if (Slot == -1)
4147     return MatchOperand_ParseFail;
4148 
4149   Parser.Lex();
4150   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4151                                               AMDGPUOperand::ImmTyInterpSlot));
4152   return MatchOperand_Success;
4153 }
4154 
4155 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4156   if (getLexer().getKind() != AsmToken::Identifier)
4157     return MatchOperand_NoMatch;
4158 
4159   StringRef Str = Parser.getTok().getString();
4160   if (!Str.startswith("attr"))
4161     return MatchOperand_NoMatch;
4162 
4163   StringRef Chan = Str.take_back(2);
4164   int AttrChan = StringSwitch<int>(Chan)
4165     .Case(".x", 0)
4166     .Case(".y", 1)
4167     .Case(".z", 2)
4168     .Case(".w", 3)
4169     .Default(-1);
4170   if (AttrChan == -1)
4171     return MatchOperand_ParseFail;
4172 
4173   Str = Str.drop_back(2).drop_front(4);
4174 
4175   uint8_t Attr;
4176   if (Str.getAsInteger(10, Attr))
4177     return MatchOperand_ParseFail;
4178 
4179   SMLoc S = Parser.getTok().getLoc();
4180   Parser.Lex();
4181   if (Attr > 63) {
4182     Error(S, "out of bounds attr");
4183     return MatchOperand_Success;
4184   }
4185 
4186   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4187 
4188   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4189                                               AMDGPUOperand::ImmTyInterpAttr));
4190   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4191                                               AMDGPUOperand::ImmTyAttrChan));
4192   return MatchOperand_Success;
4193 }
4194 
4195 void AMDGPUAsmParser::errorExpTgt() {
4196   Error(Parser.getTok().getLoc(), "invalid exp target");
4197 }
4198 
4199 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4200                                                       uint8_t &Val) {
4201   if (Str == "null") {
4202     Val = 9;
4203     return MatchOperand_Success;
4204   }
4205 
4206   if (Str.startswith("mrt")) {
4207     Str = Str.drop_front(3);
4208     if (Str == "z") { // == mrtz
4209       Val = 8;
4210       return MatchOperand_Success;
4211     }
4212 
4213     if (Str.getAsInteger(10, Val))
4214       return MatchOperand_ParseFail;
4215 
4216     if (Val > 7)
4217       errorExpTgt();
4218 
4219     return MatchOperand_Success;
4220   }
4221 
4222   if (Str.startswith("pos")) {
4223     Str = Str.drop_front(3);
4224     if (Str.getAsInteger(10, Val))
4225       return MatchOperand_ParseFail;
4226 
4227     if (Val > 3)
4228       errorExpTgt();
4229 
4230     Val += 12;
4231     return MatchOperand_Success;
4232   }
4233 
4234   if (Str.startswith("param")) {
4235     Str = Str.drop_front(5);
4236     if (Str.getAsInteger(10, Val))
4237       return MatchOperand_ParseFail;
4238 
4239     if (Val >= 32)
4240       errorExpTgt();
4241 
4242     Val += 32;
4243     return MatchOperand_Success;
4244   }
4245 
4246   if (Str.startswith("invalid_target_")) {
4247     Str = Str.drop_front(15);
4248     if (Str.getAsInteger(10, Val))
4249       return MatchOperand_ParseFail;
4250 
4251     errorExpTgt();
4252     return MatchOperand_Success;
4253   }
4254 
4255   return MatchOperand_NoMatch;
4256 }
4257 
4258 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4259   uint8_t Val;
4260   StringRef Str = Parser.getTok().getString();
4261 
4262   auto Res = parseExpTgtImpl(Str, Val);
4263   if (Res != MatchOperand_Success)
4264     return Res;
4265 
4266   SMLoc S = Parser.getTok().getLoc();
4267   Parser.Lex();
4268 
4269   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4270                                               AMDGPUOperand::ImmTyExpTgt));
4271   return MatchOperand_Success;
4272 }
4273 
4274 OperandMatchResultTy
4275 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4276   using namespace llvm::AMDGPU::SendMsg;
4277 
4278   int64_t Imm16Val = 0;
4279   SMLoc S = Parser.getTok().getLoc();
4280 
4281   switch(getLexer().getKind()) {
4282   default:
4283     return MatchOperand_NoMatch;
4284   case AsmToken::Integer:
4285     // The operand can be an integer value.
4286     if (getParser().parseAbsoluteExpression(Imm16Val))
4287       return MatchOperand_NoMatch;
4288     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4289       Error(S, "invalid immediate: only 16-bit values are legal");
4290       // Do not return error code, but create an imm operand anyway and proceed
4291       // to the next operand, if any. That avoids unneccessary error messages.
4292     }
4293     break;
4294   case AsmToken::Identifier: {
4295       OperandInfoTy Msg(ID_UNKNOWN_);
4296       OperandInfoTy Operation(OP_UNKNOWN_);
4297       int64_t StreamId = STREAM_ID_DEFAULT_;
4298       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4299         return MatchOperand_ParseFail;
4300       do {
4301         // Validate and encode message ID.
4302         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4303                 || Msg.Id == ID_SYSMSG)) {
4304           if (Msg.IsSymbolic)
4305             Error(S, "invalid/unsupported symbolic name of message");
4306           else
4307             Error(S, "invalid/unsupported code of message");
4308           break;
4309         }
4310         Imm16Val = (Msg.Id << ID_SHIFT_);
4311         // Validate and encode operation ID.
4312         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4313           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4314             if (Operation.IsSymbolic)
4315               Error(S, "invalid symbolic name of GS_OP");
4316             else
4317               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4318             break;
4319           }
4320           if (Operation.Id == OP_GS_NOP
4321               && Msg.Id != ID_GS_DONE) {
4322             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4323             break;
4324           }
4325           Imm16Val |= (Operation.Id << OP_SHIFT_);
4326         }
4327         if (Msg.Id == ID_SYSMSG) {
4328           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4329             if (Operation.IsSymbolic)
4330               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4331             else
4332               Error(S, "invalid/unsupported code of SYSMSG_OP");
4333             break;
4334           }
4335           Imm16Val |= (Operation.Id << OP_SHIFT_);
4336         }
4337         // Validate and encode stream ID.
4338         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4339           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4340             Error(S, "invalid stream id: only 2-bit values are legal");
4341             break;
4342           }
4343           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4344         }
4345       } while (false);
4346     }
4347     break;
4348   }
4349   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4350   return MatchOperand_Success;
4351 }
4352 
4353 bool AMDGPUOperand::isSendMsg() const {
4354   return isImmTy(ImmTySendMsg);
4355 }
4356 
4357 //===----------------------------------------------------------------------===//
4358 // parser helpers
4359 //===----------------------------------------------------------------------===//
4360 
4361 bool
4362 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4363   if (getLexer().getKind() == AsmToken::Identifier &&
4364       Parser.getTok().getString() == Id) {
4365     Parser.Lex();
4366     return true;
4367   }
4368   return false;
4369 }
4370 
4371 bool
4372 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4373   if (getLexer().getKind() == Kind) {
4374     Parser.Lex();
4375     return true;
4376   }
4377   return false;
4378 }
4379 
4380 bool
4381 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4382                            const StringRef ErrMsg) {
4383   if (!trySkipToken(Kind)) {
4384     Error(Parser.getTok().getLoc(), ErrMsg);
4385     return false;
4386   }
4387   return true;
4388 }
4389 
4390 bool
4391 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4392   return !getParser().parseAbsoluteExpression(Imm);
4393 }
4394 
4395 bool
4396 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4397   SMLoc S = Parser.getTok().getLoc();
4398   if (getLexer().getKind() == AsmToken::String) {
4399     Val = Parser.getTok().getStringContents();
4400     Parser.Lex();
4401     return true;
4402   } else {
4403     Error(S, ErrMsg);
4404     return false;
4405   }
4406 }
4407 
4408 //===----------------------------------------------------------------------===//
4409 // swizzle
4410 //===----------------------------------------------------------------------===//
4411 
4412 LLVM_READNONE
4413 static unsigned
4414 encodeBitmaskPerm(const unsigned AndMask,
4415                   const unsigned OrMask,
4416                   const unsigned XorMask) {
4417   using namespace llvm::AMDGPU::Swizzle;
4418 
4419   return BITMASK_PERM_ENC |
4420          (AndMask << BITMASK_AND_SHIFT) |
4421          (OrMask  << BITMASK_OR_SHIFT)  |
4422          (XorMask << BITMASK_XOR_SHIFT);
4423 }
4424 
4425 bool
4426 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4427                                       const unsigned MinVal,
4428                                       const unsigned MaxVal,
4429                                       const StringRef ErrMsg) {
4430   for (unsigned i = 0; i < OpNum; ++i) {
4431     if (!skipToken(AsmToken::Comma, "expected a comma")){
4432       return false;
4433     }
4434     SMLoc ExprLoc = Parser.getTok().getLoc();
4435     if (!parseExpr(Op[i])) {
4436       return false;
4437     }
4438     if (Op[i] < MinVal || Op[i] > MaxVal) {
4439       Error(ExprLoc, ErrMsg);
4440       return false;
4441     }
4442   }
4443 
4444   return true;
4445 }
4446 
4447 bool
4448 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4449   using namespace llvm::AMDGPU::Swizzle;
4450 
4451   int64_t Lane[LANE_NUM];
4452   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4453                            "expected a 2-bit lane id")) {
4454     Imm = QUAD_PERM_ENC;
4455     for (auto i = 0; i < LANE_NUM; ++i) {
4456       Imm |= Lane[i] << (LANE_SHIFT * i);
4457     }
4458     return true;
4459   }
4460   return false;
4461 }
4462 
4463 bool
4464 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4465   using namespace llvm::AMDGPU::Swizzle;
4466 
4467   SMLoc S = Parser.getTok().getLoc();
4468   int64_t GroupSize;
4469   int64_t LaneIdx;
4470 
4471   if (!parseSwizzleOperands(1, &GroupSize,
4472                             2, 32,
4473                             "group size must be in the interval [2,32]")) {
4474     return false;
4475   }
4476   if (!isPowerOf2_64(GroupSize)) {
4477     Error(S, "group size must be a power of two");
4478     return false;
4479   }
4480   if (parseSwizzleOperands(1, &LaneIdx,
4481                            0, GroupSize - 1,
4482                            "lane id must be in the interval [0,group size - 1]")) {
4483     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4484     return true;
4485   }
4486   return false;
4487 }
4488 
4489 bool
4490 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4491   using namespace llvm::AMDGPU::Swizzle;
4492 
4493   SMLoc S = Parser.getTok().getLoc();
4494   int64_t GroupSize;
4495 
4496   if (!parseSwizzleOperands(1, &GroupSize,
4497       2, 32, "group size must be in the interval [2,32]")) {
4498     return false;
4499   }
4500   if (!isPowerOf2_64(GroupSize)) {
4501     Error(S, "group size must be a power of two");
4502     return false;
4503   }
4504 
4505   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4506   return true;
4507 }
4508 
4509 bool
4510 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4511   using namespace llvm::AMDGPU::Swizzle;
4512 
4513   SMLoc S = Parser.getTok().getLoc();
4514   int64_t GroupSize;
4515 
4516   if (!parseSwizzleOperands(1, &GroupSize,
4517       1, 16, "group size must be in the interval [1,16]")) {
4518     return false;
4519   }
4520   if (!isPowerOf2_64(GroupSize)) {
4521     Error(S, "group size must be a power of two");
4522     return false;
4523   }
4524 
4525   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4526   return true;
4527 }
4528 
4529 bool
4530 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4531   using namespace llvm::AMDGPU::Swizzle;
4532 
4533   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4534     return false;
4535   }
4536 
4537   StringRef Ctl;
4538   SMLoc StrLoc = Parser.getTok().getLoc();
4539   if (!parseString(Ctl)) {
4540     return false;
4541   }
4542   if (Ctl.size() != BITMASK_WIDTH) {
4543     Error(StrLoc, "expected a 5-character mask");
4544     return false;
4545   }
4546 
4547   unsigned AndMask = 0;
4548   unsigned OrMask = 0;
4549   unsigned XorMask = 0;
4550 
4551   for (size_t i = 0; i < Ctl.size(); ++i) {
4552     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4553     switch(Ctl[i]) {
4554     default:
4555       Error(StrLoc, "invalid mask");
4556       return false;
4557     case '0':
4558       break;
4559     case '1':
4560       OrMask |= Mask;
4561       break;
4562     case 'p':
4563       AndMask |= Mask;
4564       break;
4565     case 'i':
4566       AndMask |= Mask;
4567       XorMask |= Mask;
4568       break;
4569     }
4570   }
4571 
4572   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4573   return true;
4574 }
4575 
4576 bool
4577 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4578 
4579   SMLoc OffsetLoc = Parser.getTok().getLoc();
4580 
4581   if (!parseExpr(Imm)) {
4582     return false;
4583   }
4584   if (!isUInt<16>(Imm)) {
4585     Error(OffsetLoc, "expected a 16-bit offset");
4586     return false;
4587   }
4588   return true;
4589 }
4590 
4591 bool
4592 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4593   using namespace llvm::AMDGPU::Swizzle;
4594 
4595   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4596 
4597     SMLoc ModeLoc = Parser.getTok().getLoc();
4598     bool Ok = false;
4599 
4600     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4601       Ok = parseSwizzleQuadPerm(Imm);
4602     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4603       Ok = parseSwizzleBitmaskPerm(Imm);
4604     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4605       Ok = parseSwizzleBroadcast(Imm);
4606     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4607       Ok = parseSwizzleSwap(Imm);
4608     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4609       Ok = parseSwizzleReverse(Imm);
4610     } else {
4611       Error(ModeLoc, "expected a swizzle mode");
4612     }
4613 
4614     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4615   }
4616 
4617   return false;
4618 }
4619 
4620 OperandMatchResultTy
4621 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4622   SMLoc S = Parser.getTok().getLoc();
4623   int64_t Imm = 0;
4624 
4625   if (trySkipId("offset")) {
4626 
4627     bool Ok = false;
4628     if (skipToken(AsmToken::Colon, "expected a colon")) {
4629       if (trySkipId("swizzle")) {
4630         Ok = parseSwizzleMacro(Imm);
4631       } else {
4632         Ok = parseSwizzleOffset(Imm);
4633       }
4634     }
4635 
4636     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4637 
4638     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4639   } else {
4640     // Swizzle "offset" operand is optional.
4641     // If it is omitted, try parsing other optional operands.
4642     return parseOptionalOpr(Operands);
4643   }
4644 }
4645 
4646 bool
4647 AMDGPUOperand::isSwizzle() const {
4648   return isImmTy(ImmTySwizzle);
4649 }
4650 
4651 //===----------------------------------------------------------------------===//
4652 // sopp branch targets
4653 //===----------------------------------------------------------------------===//
4654 
4655 OperandMatchResultTy
4656 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4657   SMLoc S = Parser.getTok().getLoc();
4658 
4659   switch (getLexer().getKind()) {
4660     default: return MatchOperand_ParseFail;
4661     case AsmToken::Integer: {
4662       int64_t Imm;
4663       if (getParser().parseAbsoluteExpression(Imm))
4664         return MatchOperand_ParseFail;
4665       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4666       return MatchOperand_Success;
4667     }
4668 
4669     case AsmToken::Identifier:
4670       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4671           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4672                                   Parser.getTok().getString()), getContext()), S));
4673       Parser.Lex();
4674       return MatchOperand_Success;
4675   }
4676 }
4677 
4678 //===----------------------------------------------------------------------===//
4679 // mubuf
4680 //===----------------------------------------------------------------------===//
4681 
4682 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4683   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4684 }
4685 
4686 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4687   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4688 }
4689 
4690 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4691                                const OperandVector &Operands,
4692                                bool IsAtomic,
4693                                bool IsAtomicReturn,
4694                                bool IsLds) {
4695   bool IsLdsOpcode = IsLds;
4696   bool HasLdsModifier = false;
4697   OptionalImmIndexMap OptionalIdx;
4698   assert(IsAtomicReturn ? IsAtomic : true);
4699 
4700   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4701     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4702 
4703     // Add the register arguments
4704     if (Op.isReg()) {
4705       Op.addRegOperands(Inst, 1);
4706       continue;
4707     }
4708 
4709     // Handle the case where soffset is an immediate
4710     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4711       Op.addImmOperands(Inst, 1);
4712       continue;
4713     }
4714 
4715     HasLdsModifier = Op.isLDS();
4716 
4717     // Handle tokens like 'offen' which are sometimes hard-coded into the
4718     // asm string.  There are no MCInst operands for these.
4719     if (Op.isToken()) {
4720       continue;
4721     }
4722     assert(Op.isImm());
4723 
4724     // Handle optional arguments
4725     OptionalIdx[Op.getImmTy()] = i;
4726   }
4727 
4728   // This is a workaround for an llvm quirk which may result in an
4729   // incorrect instruction selection. Lds and non-lds versions of
4730   // MUBUF instructions are identical except that lds versions
4731   // have mandatory 'lds' modifier. However this modifier follows
4732   // optional modifiers and llvm asm matcher regards this 'lds'
4733   // modifier as an optional one. As a result, an lds version
4734   // of opcode may be selected even if it has no 'lds' modifier.
4735   if (IsLdsOpcode && !HasLdsModifier) {
4736     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4737     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4738       Inst.setOpcode(NoLdsOpcode);
4739       IsLdsOpcode = false;
4740     }
4741   }
4742 
4743   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4744   if (IsAtomicReturn) {
4745     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4746     Inst.insert(I, *I);
4747   }
4748 
4749   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4750   if (!IsAtomic) { // glc is hard-coded.
4751     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4752   }
4753   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4754 
4755   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4756     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4757   }
4758 }
4759 
4760 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4761   OptionalImmIndexMap OptionalIdx;
4762 
4763   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4764     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4765 
4766     // Add the register arguments
4767     if (Op.isReg()) {
4768       Op.addRegOperands(Inst, 1);
4769       continue;
4770     }
4771 
4772     // Handle the case where soffset is an immediate
4773     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4774       Op.addImmOperands(Inst, 1);
4775       continue;
4776     }
4777 
4778     // Handle tokens like 'offen' which are sometimes hard-coded into the
4779     // asm string.  There are no MCInst operands for these.
4780     if (Op.isToken()) {
4781       continue;
4782     }
4783     assert(Op.isImm());
4784 
4785     // Handle optional arguments
4786     OptionalIdx[Op.getImmTy()] = i;
4787   }
4788 
4789   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4790                         AMDGPUOperand::ImmTyOffset);
4791   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4792   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4793   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4794   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4795 }
4796 
4797 //===----------------------------------------------------------------------===//
4798 // mimg
4799 //===----------------------------------------------------------------------===//
4800 
4801 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4802                               bool IsAtomic) {
4803   unsigned I = 1;
4804   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4805   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4806     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4807   }
4808 
4809   if (IsAtomic) {
4810     // Add src, same as dst
4811     assert(Desc.getNumDefs() == 1);
4812     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4813   }
4814 
4815   OptionalImmIndexMap OptionalIdx;
4816 
4817   for (unsigned E = Operands.size(); I != E; ++I) {
4818     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4819 
4820     // Add the register arguments
4821     if (Op.isReg()) {
4822       Op.addRegOperands(Inst, 1);
4823     } else if (Op.isImmModifier()) {
4824       OptionalIdx[Op.getImmTy()] = I;
4825     } else {
4826       llvm_unreachable("unexpected operand type");
4827     }
4828   }
4829 
4830   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4831   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4832   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4833   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4834   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4835   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4836   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4837   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4838   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4839 }
4840 
4841 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4842   cvtMIMG(Inst, Operands, true);
4843 }
4844 
4845 //===----------------------------------------------------------------------===//
4846 // smrd
4847 //===----------------------------------------------------------------------===//
4848 
4849 bool AMDGPUOperand::isSMRDOffset8() const {
4850   return isImm() && isUInt<8>(getImm());
4851 }
4852 
4853 bool AMDGPUOperand::isSMRDOffset20() const {
4854   return isImm() && isUInt<20>(getImm());
4855 }
4856 
4857 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4858   // 32-bit literals are only supported on CI and we only want to use them
4859   // when the offset is > 8-bits.
4860   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4861 }
4862 
4863 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4864   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4865 }
4866 
4867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4868   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4869 }
4870 
4871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4872   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4873 }
4874 
4875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4876   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4877 }
4878 
4879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4880   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4881 }
4882 
4883 //===----------------------------------------------------------------------===//
4884 // vop3
4885 //===----------------------------------------------------------------------===//
4886 
4887 static bool ConvertOmodMul(int64_t &Mul) {
4888   if (Mul != 1 && Mul != 2 && Mul != 4)
4889     return false;
4890 
4891   Mul >>= 1;
4892   return true;
4893 }
4894 
4895 static bool ConvertOmodDiv(int64_t &Div) {
4896   if (Div == 1) {
4897     Div = 0;
4898     return true;
4899   }
4900 
4901   if (Div == 2) {
4902     Div = 3;
4903     return true;
4904   }
4905 
4906   return false;
4907 }
4908 
4909 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4910   if (BoundCtrl == 0) {
4911     BoundCtrl = 1;
4912     return true;
4913   }
4914 
4915   if (BoundCtrl == -1) {
4916     BoundCtrl = 0;
4917     return true;
4918   }
4919 
4920   return false;
4921 }
4922 
4923 // Note: the order in this table matches the order of operands in AsmString.
4924 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4925   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4926   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4927   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4928   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4929   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4930   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4931   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
4932   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4933   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4934   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
4935   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4936   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4937   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4938   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
4939   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4940   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4941   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4942   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4943   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4944   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
4945   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
4946   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4947   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
4948   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4949   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4950   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4951   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4952   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4953   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4954   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4955   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4956   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4957   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4958   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4959   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4960   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4961   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4962 };
4963 
4964 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4965   unsigned size = Operands.size();
4966   assert(size > 0);
4967 
4968   OperandMatchResultTy res = parseOptionalOpr(Operands);
4969 
4970   // This is a hack to enable hardcoded mandatory operands which follow
4971   // optional operands.
4972   //
4973   // Current design assumes that all operands after the first optional operand
4974   // are also optional. However implementation of some instructions violates
4975   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4976   //
4977   // To alleviate this problem, we have to (implicitly) parse extra operands
4978   // to make sure autogenerated parser of custom operands never hit hardcoded
4979   // mandatory operands.
4980 
4981   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4982 
4983     // We have parsed the first optional operand.
4984     // Parse as many operands as necessary to skip all mandatory operands.
4985 
4986     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4987       if (res != MatchOperand_Success ||
4988           getLexer().is(AsmToken::EndOfStatement)) break;
4989       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4990       res = parseOptionalOpr(Operands);
4991     }
4992   }
4993 
4994   return res;
4995 }
4996 
4997 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4998   OperandMatchResultTy res;
4999   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5000     // try to parse any optional operand here
5001     if (Op.IsBit) {
5002       res = parseNamedBit(Op.Name, Operands, Op.Type);
5003     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5004       res = parseOModOperand(Operands);
5005     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5006                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5007                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5008       res = parseSDWASel(Operands, Op.Name, Op.Type);
5009     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5010       res = parseSDWADstUnused(Operands);
5011     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5012                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5013                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5014                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5015       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5016                                         Op.ConvertResult);
5017     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5018       res = parseDfmtNfmt(Operands);
5019     } else {
5020       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5021     }
5022     if (res != MatchOperand_NoMatch) {
5023       return res;
5024     }
5025   }
5026   return MatchOperand_NoMatch;
5027 }
5028 
5029 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5030   StringRef Name = Parser.getTok().getString();
5031   if (Name == "mul") {
5032     return parseIntWithPrefix("mul", Operands,
5033                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5034   }
5035 
5036   if (Name == "div") {
5037     return parseIntWithPrefix("div", Operands,
5038                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5039   }
5040 
5041   return MatchOperand_NoMatch;
5042 }
5043 
5044 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5045   cvtVOP3P(Inst, Operands);
5046 
5047   int Opc = Inst.getOpcode();
5048 
5049   int SrcNum;
5050   const int Ops[] = { AMDGPU::OpName::src0,
5051                       AMDGPU::OpName::src1,
5052                       AMDGPU::OpName::src2 };
5053   for (SrcNum = 0;
5054        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5055        ++SrcNum);
5056   assert(SrcNum > 0);
5057 
5058   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5059   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5060 
5061   if ((OpSel & (1 << SrcNum)) != 0) {
5062     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5063     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5064     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5065   }
5066 }
5067 
5068 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5069       // 1. This operand is input modifiers
5070   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5071       // 2. This is not last operand
5072       && Desc.NumOperands > (OpNum + 1)
5073       // 3. Next operand is register class
5074       && Desc.OpInfo[OpNum + 1].RegClass != -1
5075       // 4. Next register is not tied to any other operand
5076       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5077 }
5078 
5079 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5080 {
5081   OptionalImmIndexMap OptionalIdx;
5082   unsigned Opc = Inst.getOpcode();
5083 
5084   unsigned I = 1;
5085   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5086   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5087     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5088   }
5089 
5090   for (unsigned E = Operands.size(); I != E; ++I) {
5091     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5092     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5093       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5094     } else if (Op.isInterpSlot() ||
5095                Op.isInterpAttr() ||
5096                Op.isAttrChan()) {
5097       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5098     } else if (Op.isImmModifier()) {
5099       OptionalIdx[Op.getImmTy()] = I;
5100     } else {
5101       llvm_unreachable("unhandled operand type");
5102     }
5103   }
5104 
5105   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5106     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5107   }
5108 
5109   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5110     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5111   }
5112 
5113   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5114     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5115   }
5116 }
5117 
5118 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5119                               OptionalImmIndexMap &OptionalIdx) {
5120   unsigned Opc = Inst.getOpcode();
5121 
5122   unsigned I = 1;
5123   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5124   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5125     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5126   }
5127 
5128   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5129     // This instruction has src modifiers
5130     for (unsigned E = Operands.size(); I != E; ++I) {
5131       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5132       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5133         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5134       } else if (Op.isImmModifier()) {
5135         OptionalIdx[Op.getImmTy()] = I;
5136       } else if (Op.isRegOrImm()) {
5137         Op.addRegOrImmOperands(Inst, 1);
5138       } else {
5139         llvm_unreachable("unhandled operand type");
5140       }
5141     }
5142   } else {
5143     // No src modifiers
5144     for (unsigned E = Operands.size(); I != E; ++I) {
5145       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5146       if (Op.isMod()) {
5147         OptionalIdx[Op.getImmTy()] = I;
5148       } else {
5149         Op.addRegOrImmOperands(Inst, 1);
5150       }
5151     }
5152   }
5153 
5154   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5155     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5156   }
5157 
5158   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5159     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5160   }
5161 
5162   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5163   // it has src2 register operand that is tied to dst operand
5164   // we don't allow modifiers for this operand in assembler so src2_modifiers
5165   // should be 0.
5166   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5167       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5168       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5169       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5170     auto it = Inst.begin();
5171     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5172     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5173     ++it;
5174     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5175   }
5176 }
5177 
5178 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5179   OptionalImmIndexMap OptionalIdx;
5180   cvtVOP3(Inst, Operands, OptionalIdx);
5181 }
5182 
5183 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5184                                const OperandVector &Operands) {
5185   OptionalImmIndexMap OptIdx;
5186   const int Opc = Inst.getOpcode();
5187   const MCInstrDesc &Desc = MII.get(Opc);
5188 
5189   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5190 
5191   cvtVOP3(Inst, Operands, OptIdx);
5192 
5193   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5194     assert(!IsPacked);
5195     Inst.addOperand(Inst.getOperand(0));
5196   }
5197 
5198   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5199   // instruction, and then figure out where to actually put the modifiers
5200 
5201   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5202 
5203   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5204   if (OpSelHiIdx != -1) {
5205     int DefaultVal = IsPacked ? -1 : 0;
5206     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5207                           DefaultVal);
5208   }
5209 
5210   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5211   if (NegLoIdx != -1) {
5212     assert(IsPacked);
5213     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5214     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5215   }
5216 
5217   const int Ops[] = { AMDGPU::OpName::src0,
5218                       AMDGPU::OpName::src1,
5219                       AMDGPU::OpName::src2 };
5220   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5221                          AMDGPU::OpName::src1_modifiers,
5222                          AMDGPU::OpName::src2_modifiers };
5223 
5224   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5225 
5226   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5227   unsigned OpSelHi = 0;
5228   unsigned NegLo = 0;
5229   unsigned NegHi = 0;
5230 
5231   if (OpSelHiIdx != -1) {
5232     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5233   }
5234 
5235   if (NegLoIdx != -1) {
5236     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5237     NegLo = Inst.getOperand(NegLoIdx).getImm();
5238     NegHi = Inst.getOperand(NegHiIdx).getImm();
5239   }
5240 
5241   for (int J = 0; J < 3; ++J) {
5242     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5243     if (OpIdx == -1)
5244       break;
5245 
5246     uint32_t ModVal = 0;
5247 
5248     if ((OpSel & (1 << J)) != 0)
5249       ModVal |= SISrcMods::OP_SEL_0;
5250 
5251     if ((OpSelHi & (1 << J)) != 0)
5252       ModVal |= SISrcMods::OP_SEL_1;
5253 
5254     if ((NegLo & (1 << J)) != 0)
5255       ModVal |= SISrcMods::NEG;
5256 
5257     if ((NegHi & (1 << J)) != 0)
5258       ModVal |= SISrcMods::NEG_HI;
5259 
5260     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5261 
5262     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5263   }
5264 }
5265 
5266 //===----------------------------------------------------------------------===//
5267 // dpp
5268 //===----------------------------------------------------------------------===//
5269 
5270 bool AMDGPUOperand::isDPPCtrl() const {
5271   using namespace AMDGPU::DPP;
5272 
5273   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5274   if (result) {
5275     int64_t Imm = getImm();
5276     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5277            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5278            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5279            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5280            (Imm == DppCtrl::WAVE_SHL1) ||
5281            (Imm == DppCtrl::WAVE_ROL1) ||
5282            (Imm == DppCtrl::WAVE_SHR1) ||
5283            (Imm == DppCtrl::WAVE_ROR1) ||
5284            (Imm == DppCtrl::ROW_MIRROR) ||
5285            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5286            (Imm == DppCtrl::BCAST15) ||
5287            (Imm == DppCtrl::BCAST31);
5288   }
5289   return false;
5290 }
5291 
5292 bool AMDGPUOperand::isGPRIdxMode() const {
5293   return isImm() && isUInt<4>(getImm());
5294 }
5295 
5296 bool AMDGPUOperand::isS16Imm() const {
5297   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5298 }
5299 
5300 bool AMDGPUOperand::isU16Imm() const {
5301   return isImm() && isUInt<16>(getImm());
5302 }
5303 
5304 OperandMatchResultTy
5305 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5306   using namespace AMDGPU::DPP;
5307 
5308   SMLoc S = Parser.getTok().getLoc();
5309   StringRef Prefix;
5310   int64_t Int;
5311 
5312   if (getLexer().getKind() == AsmToken::Identifier) {
5313     Prefix = Parser.getTok().getString();
5314   } else {
5315     return MatchOperand_NoMatch;
5316   }
5317 
5318   if (Prefix == "row_mirror") {
5319     Int = DppCtrl::ROW_MIRROR;
5320     Parser.Lex();
5321   } else if (Prefix == "row_half_mirror") {
5322     Int = DppCtrl::ROW_HALF_MIRROR;
5323     Parser.Lex();
5324   } else {
5325     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5326     if (Prefix != "quad_perm"
5327         && Prefix != "row_shl"
5328         && Prefix != "row_shr"
5329         && Prefix != "row_ror"
5330         && Prefix != "wave_shl"
5331         && Prefix != "wave_rol"
5332         && Prefix != "wave_shr"
5333         && Prefix != "wave_ror"
5334         && Prefix != "row_bcast") {
5335       return MatchOperand_NoMatch;
5336     }
5337 
5338     Parser.Lex();
5339     if (getLexer().isNot(AsmToken::Colon))
5340       return MatchOperand_ParseFail;
5341 
5342     if (Prefix == "quad_perm") {
5343       // quad_perm:[%d,%d,%d,%d]
5344       Parser.Lex();
5345       if (getLexer().isNot(AsmToken::LBrac))
5346         return MatchOperand_ParseFail;
5347       Parser.Lex();
5348 
5349       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5350         return MatchOperand_ParseFail;
5351 
5352       for (int i = 0; i < 3; ++i) {
5353         if (getLexer().isNot(AsmToken::Comma))
5354           return MatchOperand_ParseFail;
5355         Parser.Lex();
5356 
5357         int64_t Temp;
5358         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5359           return MatchOperand_ParseFail;
5360         const int shift = i*2 + 2;
5361         Int += (Temp << shift);
5362       }
5363 
5364       if (getLexer().isNot(AsmToken::RBrac))
5365         return MatchOperand_ParseFail;
5366       Parser.Lex();
5367     } else {
5368       // sel:%d
5369       Parser.Lex();
5370       if (getParser().parseAbsoluteExpression(Int))
5371         return MatchOperand_ParseFail;
5372 
5373       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5374         Int |= DppCtrl::ROW_SHL0;
5375       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5376         Int |= DppCtrl::ROW_SHR0;
5377       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5378         Int |= DppCtrl::ROW_ROR0;
5379       } else if (Prefix == "wave_shl" && 1 == Int) {
5380         Int = DppCtrl::WAVE_SHL1;
5381       } else if (Prefix == "wave_rol" && 1 == Int) {
5382         Int = DppCtrl::WAVE_ROL1;
5383       } else if (Prefix == "wave_shr" && 1 == Int) {
5384         Int = DppCtrl::WAVE_SHR1;
5385       } else if (Prefix == "wave_ror" && 1 == Int) {
5386         Int = DppCtrl::WAVE_ROR1;
5387       } else if (Prefix == "row_bcast") {
5388         if (Int == 15) {
5389           Int = DppCtrl::BCAST15;
5390         } else if (Int == 31) {
5391           Int = DppCtrl::BCAST31;
5392         } else {
5393           return MatchOperand_ParseFail;
5394         }
5395       } else {
5396         return MatchOperand_ParseFail;
5397       }
5398     }
5399   }
5400 
5401   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5402   return MatchOperand_Success;
5403 }
5404 
5405 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5406   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5407 }
5408 
5409 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5410   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5411 }
5412 
5413 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5414   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5415 }
5416 
5417 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5418   OptionalImmIndexMap OptionalIdx;
5419 
5420   unsigned I = 1;
5421   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5422   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5423     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5424   }
5425 
5426   for (unsigned E = Operands.size(); I != E; ++I) {
5427     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5428                                             MCOI::TIED_TO);
5429     if (TiedTo != -1) {
5430       assert((unsigned)TiedTo < Inst.getNumOperands());
5431       // handle tied old or src2 for MAC instructions
5432       Inst.addOperand(Inst.getOperand(TiedTo));
5433     }
5434     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5435     // Add the register arguments
5436     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5437       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5438       // Skip it.
5439       continue;
5440     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5441       Op.addRegWithFPInputModsOperands(Inst, 2);
5442     } else if (Op.isDPPCtrl()) {
5443       Op.addImmOperands(Inst, 1);
5444     } else if (Op.isImm()) {
5445       // Handle optional arguments
5446       OptionalIdx[Op.getImmTy()] = I;
5447     } else {
5448       llvm_unreachable("Invalid operand type");
5449     }
5450   }
5451 
5452   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5453   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5454   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5455 }
5456 
5457 //===----------------------------------------------------------------------===//
5458 // sdwa
5459 //===----------------------------------------------------------------------===//
5460 
5461 OperandMatchResultTy
5462 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5463                               AMDGPUOperand::ImmTy Type) {
5464   using namespace llvm::AMDGPU::SDWA;
5465 
5466   SMLoc S = Parser.getTok().getLoc();
5467   StringRef Value;
5468   OperandMatchResultTy res;
5469 
5470   res = parseStringWithPrefix(Prefix, Value);
5471   if (res != MatchOperand_Success) {
5472     return res;
5473   }
5474 
5475   int64_t Int;
5476   Int = StringSwitch<int64_t>(Value)
5477         .Case("BYTE_0", SdwaSel::BYTE_0)
5478         .Case("BYTE_1", SdwaSel::BYTE_1)
5479         .Case("BYTE_2", SdwaSel::BYTE_2)
5480         .Case("BYTE_3", SdwaSel::BYTE_3)
5481         .Case("WORD_0", SdwaSel::WORD_0)
5482         .Case("WORD_1", SdwaSel::WORD_1)
5483         .Case("DWORD", SdwaSel::DWORD)
5484         .Default(0xffffffff);
5485   Parser.Lex(); // eat last token
5486 
5487   if (Int == 0xffffffff) {
5488     return MatchOperand_ParseFail;
5489   }
5490 
5491   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5492   return MatchOperand_Success;
5493 }
5494 
5495 OperandMatchResultTy
5496 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5497   using namespace llvm::AMDGPU::SDWA;
5498 
5499   SMLoc S = Parser.getTok().getLoc();
5500   StringRef Value;
5501   OperandMatchResultTy res;
5502 
5503   res = parseStringWithPrefix("dst_unused", Value);
5504   if (res != MatchOperand_Success) {
5505     return res;
5506   }
5507 
5508   int64_t Int;
5509   Int = StringSwitch<int64_t>(Value)
5510         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5511         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5512         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5513         .Default(0xffffffff);
5514   Parser.Lex(); // eat last token
5515 
5516   if (Int == 0xffffffff) {
5517     return MatchOperand_ParseFail;
5518   }
5519 
5520   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5521   return MatchOperand_Success;
5522 }
5523 
5524 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5525   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5526 }
5527 
5528 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5529   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5530 }
5531 
5532 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5533   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5534 }
5535 
5536 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5537   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5538 }
5539 
5540 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5541                               uint64_t BasicInstType, bool skipVcc) {
5542   using namespace llvm::AMDGPU::SDWA;
5543 
5544   OptionalImmIndexMap OptionalIdx;
5545   bool skippedVcc = false;
5546 
5547   unsigned I = 1;
5548   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5549   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5550     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5551   }
5552 
5553   for (unsigned E = Operands.size(); I != E; ++I) {
5554     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5555     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5556       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5557       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5558       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5559       // Skip VCC only if we didn't skip it on previous iteration.
5560       if (BasicInstType == SIInstrFlags::VOP2 &&
5561           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5562         skippedVcc = true;
5563         continue;
5564       } else if (BasicInstType == SIInstrFlags::VOPC &&
5565                  Inst.getNumOperands() == 0) {
5566         skippedVcc = true;
5567         continue;
5568       }
5569     }
5570     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5571       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5572     } else if (Op.isImm()) {
5573       // Handle optional arguments
5574       OptionalIdx[Op.getImmTy()] = I;
5575     } else {
5576       llvm_unreachable("Invalid operand type");
5577     }
5578     skippedVcc = false;
5579   }
5580 
5581   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5582       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5583     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5584     switch (BasicInstType) {
5585     case SIInstrFlags::VOP1:
5586       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5587       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5588         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5589       }
5590       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5591       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5592       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5593       break;
5594 
5595     case SIInstrFlags::VOP2:
5596       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5597       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5598         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5599       }
5600       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5601       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5602       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5603       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5604       break;
5605 
5606     case SIInstrFlags::VOPC:
5607       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5608       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5609       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5610       break;
5611 
5612     default:
5613       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5614     }
5615   }
5616 
5617   // special case v_mac_{f16, f32}:
5618   // it has src2 register operand that is tied to dst operand
5619   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5620       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5621     auto it = Inst.begin();
5622     std::advance(
5623       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5624     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5625   }
5626 }
5627 
5628 /// Force static initialization.
5629 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5630   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5631   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5632 }
5633 
5634 #define GET_REGISTER_MATCHER
5635 #define GET_MATCHER_IMPLEMENTATION
5636 #define GET_MNEMONIC_SPELL_CHECKER
5637 #include "AMDGPUGenAsmMatcher.inc"
5638 
5639 // This fuction should be defined after auto-generated include so that we have
5640 // MatchClassKind enum defined
5641 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5642                                                      unsigned Kind) {
5643   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5644   // But MatchInstructionImpl() expects to meet token and fails to validate
5645   // operand. This method checks if we are given immediate operand but expect to
5646   // get corresponding token.
5647   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5648   switch (Kind) {
5649   case MCK_addr64:
5650     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5651   case MCK_gds:
5652     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5653   case MCK_lds:
5654     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5655   case MCK_glc:
5656     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5657   case MCK_idxen:
5658     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5659   case MCK_offen:
5660     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5661   case MCK_SSrcB32:
5662     // When operands have expression values, they will return true for isToken,
5663     // because it is not possible to distinguish between a token and an
5664     // expression at parse time. MatchInstructionImpl() will always try to
5665     // match an operand as a token, when isToken returns true, and when the
5666     // name of the expression is not a valid token, the match will fail,
5667     // so we need to handle it here.
5668     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5669   case MCK_SSrcF32:
5670     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5671   case MCK_SoppBrTarget:
5672     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5673   case MCK_VReg32OrOff:
5674     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5675   case MCK_InterpSlot:
5676     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5677   case MCK_Attr:
5678     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5679   case MCK_AttrChan:
5680     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5681   default:
5682     return Match_InvalidOperand;
5683   }
5684 }
5685