1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyEndpgm,
178     ImmTyHigh
179   };
180 
181   struct TokOp {
182     const char *Data;
183     unsigned Length;
184   };
185 
186   struct ImmOp {
187     int64_t Val;
188     ImmTy Type;
189     bool IsFPImm;
190     Modifiers Mods;
191   };
192 
193   struct RegOp {
194     unsigned RegNo;
195     bool IsForcedVOP3;
196     Modifiers Mods;
197   };
198 
199   union {
200     TokOp Tok;
201     ImmOp Imm;
202     RegOp Reg;
203     const MCExpr *Expr;
204   };
205 
206   bool isToken() const override {
207     if (Kind == Token)
208       return true;
209 
210     if (Kind != Expression || !Expr)
211       return false;
212 
213     // When parsing operands, we can't always tell if something was meant to be
214     // a token, like 'gds', or an expression that references a global variable.
215     // In this case, we assume the string is an expression, and if we need to
216     // interpret is a token, then we treat the symbol name as the token.
217     return isa<MCSymbolRefExpr>(Expr);
218   }
219 
220   bool isImm() const override {
221     return Kind == Immediate;
222   }
223 
224   bool isInlinableImm(MVT type) const;
225   bool isLiteralImm(MVT type) const;
226 
227   bool isRegKind() const {
228     return Kind == Register;
229   }
230 
231   bool isReg() const override {
232     return isRegKind() && !hasModifiers();
233   }
234 
235   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
236     return isRegClass(RCID) || isInlinableImm(type);
237   }
238 
239   bool isRegOrImmWithInt16InputMods() const {
240     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
241   }
242 
243   bool isRegOrImmWithInt32InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
245   }
246 
247   bool isRegOrImmWithInt64InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
249   }
250 
251   bool isRegOrImmWithFP16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
253   }
254 
255   bool isRegOrImmWithFP32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
257   }
258 
259   bool isRegOrImmWithFP64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
261   }
262 
263   bool isVReg() const {
264     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
265            isRegClass(AMDGPU::VReg_64RegClassID) ||
266            isRegClass(AMDGPU::VReg_96RegClassID) ||
267            isRegClass(AMDGPU::VReg_128RegClassID) ||
268            isRegClass(AMDGPU::VReg_256RegClassID) ||
269            isRegClass(AMDGPU::VReg_512RegClassID);
270   }
271 
272   bool isVReg32() const {
273     return isRegClass(AMDGPU::VGPR_32RegClassID);
274   }
275 
276   bool isVReg32OrOff() const {
277     return isOff() || isVReg32();
278   }
279 
280   bool isSDWAOperand(MVT type) const;
281   bool isSDWAFP16Operand() const;
282   bool isSDWAFP32Operand() const;
283   bool isSDWAInt16Operand() const;
284   bool isSDWAInt32Operand() const;
285 
286   bool isImmTy(ImmTy ImmT) const {
287     return isImm() && Imm.Type == ImmT;
288   }
289 
290   bool isImmModifier() const {
291     return isImm() && Imm.Type != ImmTyNone;
292   }
293 
294   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
295   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
296   bool isDMask() const { return isImmTy(ImmTyDMask); }
297   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
298   bool isDA() const { return isImmTy(ImmTyDA); }
299   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
300   bool isLWE() const { return isImmTy(ImmTyLWE); }
301   bool isOff() const { return isImmTy(ImmTyOff); }
302   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
303   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
304   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
305   bool isOffen() const { return isImmTy(ImmTyOffen); }
306   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
307   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
308   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
309   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
310   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
311 
312   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
313   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
314   bool isGDS() const { return isImmTy(ImmTyGDS); }
315   bool isLDS() const { return isImmTy(ImmTyLDS); }
316   bool isGLC() const { return isImmTy(ImmTyGLC); }
317   bool isSLC() const { return isImmTy(ImmTySLC); }
318   bool isTFE() const { return isImmTy(ImmTyTFE); }
319   bool isD16() const { return isImmTy(ImmTyD16); }
320   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
321   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
322   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
323   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
324   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
325   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
326   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
327   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
328   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
329   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
330   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
331   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
332   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
333   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
334   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
335   bool isHigh() const { return isImmTy(ImmTyHigh); }
336 
337   bool isMod() const {
338     return isClampSI() || isOModSI();
339   }
340 
341   bool isRegOrImm() const {
342     return isReg() || isImm();
343   }
344 
345   bool isRegClass(unsigned RCID) const;
346 
347   bool isInlineValue() const;
348 
349   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
350     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
351   }
352 
353   bool isSCSrcB16() const {
354     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
355   }
356 
357   bool isSCSrcV2B16() const {
358     return isSCSrcB16();
359   }
360 
361   bool isSCSrcB32() const {
362     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
363   }
364 
365   bool isSCSrcB64() const {
366     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
367   }
368 
369   bool isSCSrcF16() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
371   }
372 
373   bool isSCSrcV2F16() const {
374     return isSCSrcF16();
375   }
376 
377   bool isSCSrcF32() const {
378     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
379   }
380 
381   bool isSCSrcF64() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
383   }
384 
385   bool isSSrcB32() const {
386     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
387   }
388 
389   bool isSSrcB16() const {
390     return isSCSrcB16() || isLiteralImm(MVT::i16);
391   }
392 
393   bool isSSrcV2B16() const {
394     llvm_unreachable("cannot happen");
395     return isSSrcB16();
396   }
397 
398   bool isSSrcB64() const {
399     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
400     // See isVSrc64().
401     return isSCSrcB64() || isLiteralImm(MVT::i64);
402   }
403 
404   bool isSSrcF32() const {
405     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
406   }
407 
408   bool isSSrcF64() const {
409     return isSCSrcB64() || isLiteralImm(MVT::f64);
410   }
411 
412   bool isSSrcF16() const {
413     return isSCSrcB16() || isLiteralImm(MVT::f16);
414   }
415 
416   bool isSSrcV2F16() const {
417     llvm_unreachable("cannot happen");
418     return isSSrcF16();
419   }
420 
421   bool isSSrcOrLdsB32() const {
422     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
423            isLiteralImm(MVT::i32) || isExpr();
424   }
425 
426   bool isVCSrcB32() const {
427     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
428   }
429 
430   bool isVCSrcB64() const {
431     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
432   }
433 
434   bool isVCSrcB16() const {
435     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
436   }
437 
438   bool isVCSrcV2B16() const {
439     return isVCSrcB16();
440   }
441 
442   bool isVCSrcF32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
444   }
445 
446   bool isVCSrcF64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
448   }
449 
450   bool isVCSrcF16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
452   }
453 
454   bool isVCSrcV2F16() const {
455     return isVCSrcF16();
456   }
457 
458   bool isVSrcB32() const {
459     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
460   }
461 
462   bool isVSrcB64() const {
463     return isVCSrcF64() || isLiteralImm(MVT::i64);
464   }
465 
466   bool isVSrcB16() const {
467     return isVCSrcF16() || isLiteralImm(MVT::i16);
468   }
469 
470   bool isVSrcV2B16() const {
471     llvm_unreachable("cannot happen");
472     return isVSrcB16();
473   }
474 
475   bool isVSrcF32() const {
476     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
477   }
478 
479   bool isVSrcF64() const {
480     return isVCSrcF64() || isLiteralImm(MVT::f64);
481   }
482 
483   bool isVSrcF16() const {
484     return isVCSrcF16() || isLiteralImm(MVT::f16);
485   }
486 
487   bool isVSrcV2F16() const {
488     llvm_unreachable("cannot happen");
489     return isVSrcF16();
490   }
491 
492   bool isKImmFP32() const {
493     return isLiteralImm(MVT::f32);
494   }
495 
496   bool isKImmFP16() const {
497     return isLiteralImm(MVT::f16);
498   }
499 
500   bool isMem() const override {
501     return false;
502   }
503 
504   bool isExpr() const {
505     return Kind == Expression;
506   }
507 
508   bool isSoppBrTarget() const {
509     return isExpr() || isImm();
510   }
511 
512   bool isSWaitCnt() const;
513   bool isHwreg() const;
514   bool isSendMsg() const;
515   bool isSwizzle() const;
516   bool isSMRDOffset8() const;
517   bool isSMRDOffset20() const;
518   bool isSMRDLiteralOffset() const;
519   bool isDPPCtrl() const;
520   bool isGPRIdxMode() const;
521   bool isS16Imm() const;
522   bool isU16Imm() const;
523   bool isEndpgm() const;
524 
525   StringRef getExpressionAsToken() const {
526     assert(isExpr());
527     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
528     return S->getSymbol().getName();
529   }
530 
531   StringRef getToken() const {
532     assert(isToken());
533 
534     if (Kind == Expression)
535       return getExpressionAsToken();
536 
537     return StringRef(Tok.Data, Tok.Length);
538   }
539 
540   int64_t getImm() const {
541     assert(isImm());
542     return Imm.Val;
543   }
544 
545   ImmTy getImmTy() const {
546     assert(isImm());
547     return Imm.Type;
548   }
549 
550   unsigned getReg() const override {
551     return Reg.RegNo;
552   }
553 
554   SMLoc getStartLoc() const override {
555     return StartLoc;
556   }
557 
558   SMLoc getEndLoc() const override {
559     return EndLoc;
560   }
561 
562   SMRange getLocRange() const {
563     return SMRange(StartLoc, EndLoc);
564   }
565 
566   Modifiers getModifiers() const {
567     assert(isRegKind() || isImmTy(ImmTyNone));
568     return isRegKind() ? Reg.Mods : Imm.Mods;
569   }
570 
571   void setModifiers(Modifiers Mods) {
572     assert(isRegKind() || isImmTy(ImmTyNone));
573     if (isRegKind())
574       Reg.Mods = Mods;
575     else
576       Imm.Mods = Mods;
577   }
578 
579   bool hasModifiers() const {
580     return getModifiers().hasModifiers();
581   }
582 
583   bool hasFPModifiers() const {
584     return getModifiers().hasFPModifiers();
585   }
586 
587   bool hasIntModifiers() const {
588     return getModifiers().hasIntModifiers();
589   }
590 
591   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
592 
593   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
594 
595   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
596 
597   template <unsigned Bitwidth>
598   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
599 
600   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
601     addKImmFPOperands<16>(Inst, N);
602   }
603 
604   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
605     addKImmFPOperands<32>(Inst, N);
606   }
607 
608   void addRegOperands(MCInst &Inst, unsigned N) const;
609 
610   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
611     if (isRegKind())
612       addRegOperands(Inst, N);
613     else if (isExpr())
614       Inst.addOperand(MCOperand::createExpr(Expr));
615     else
616       addImmOperands(Inst, N);
617   }
618 
619   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
620     Modifiers Mods = getModifiers();
621     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
622     if (isRegKind()) {
623       addRegOperands(Inst, N);
624     } else {
625       addImmOperands(Inst, N, false);
626     }
627   }
628 
629   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
630     assert(!hasIntModifiers());
631     addRegOrImmWithInputModsOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
635     assert(!hasFPModifiers());
636     addRegOrImmWithInputModsOperands(Inst, N);
637   }
638 
639   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
640     Modifiers Mods = getModifiers();
641     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
642     assert(isRegKind());
643     addRegOperands(Inst, N);
644   }
645 
646   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
647     assert(!hasIntModifiers());
648     addRegWithInputModsOperands(Inst, N);
649   }
650 
651   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
652     assert(!hasFPModifiers());
653     addRegWithInputModsOperands(Inst, N);
654   }
655 
656   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
657     if (isImm())
658       addImmOperands(Inst, N);
659     else {
660       assert(isExpr());
661       Inst.addOperand(MCOperand::createExpr(Expr));
662     }
663   }
664 
665   static void printImmTy(raw_ostream& OS, ImmTy Type) {
666     switch (Type) {
667     case ImmTyNone: OS << "None"; break;
668     case ImmTyGDS: OS << "GDS"; break;
669     case ImmTyLDS: OS << "LDS"; break;
670     case ImmTyOffen: OS << "Offen"; break;
671     case ImmTyIdxen: OS << "Idxen"; break;
672     case ImmTyAddr64: OS << "Addr64"; break;
673     case ImmTyOffset: OS << "Offset"; break;
674     case ImmTyInstOffset: OS << "InstOffset"; break;
675     case ImmTyOffset0: OS << "Offset0"; break;
676     case ImmTyOffset1: OS << "Offset1"; break;
677     case ImmTyGLC: OS << "GLC"; break;
678     case ImmTySLC: OS << "SLC"; break;
679     case ImmTyTFE: OS << "TFE"; break;
680     case ImmTyD16: OS << "D16"; break;
681     case ImmTyFORMAT: OS << "FORMAT"; break;
682     case ImmTyClampSI: OS << "ClampSI"; break;
683     case ImmTyOModSI: OS << "OModSI"; break;
684     case ImmTyDppCtrl: OS << "DppCtrl"; break;
685     case ImmTyDppRowMask: OS << "DppRowMask"; break;
686     case ImmTyDppBankMask: OS << "DppBankMask"; break;
687     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
688     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
689     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
690     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
691     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
692     case ImmTyDMask: OS << "DMask"; break;
693     case ImmTyUNorm: OS << "UNorm"; break;
694     case ImmTyDA: OS << "DA"; break;
695     case ImmTyR128A16: OS << "R128A16"; break;
696     case ImmTyLWE: OS << "LWE"; break;
697     case ImmTyOff: OS << "Off"; break;
698     case ImmTyExpTgt: OS << "ExpTgt"; break;
699     case ImmTyExpCompr: OS << "ExpCompr"; break;
700     case ImmTyExpVM: OS << "ExpVM"; break;
701     case ImmTyHwreg: OS << "Hwreg"; break;
702     case ImmTySendMsg: OS << "SendMsg"; break;
703     case ImmTyInterpSlot: OS << "InterpSlot"; break;
704     case ImmTyInterpAttr: OS << "InterpAttr"; break;
705     case ImmTyAttrChan: OS << "AttrChan"; break;
706     case ImmTyOpSel: OS << "OpSel"; break;
707     case ImmTyOpSelHi: OS << "OpSelHi"; break;
708     case ImmTyNegLo: OS << "NegLo"; break;
709     case ImmTyNegHi: OS << "NegHi"; break;
710     case ImmTySwizzle: OS << "Swizzle"; break;
711     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
712     case ImmTyHigh: OS << "High"; break;
713     case ImmTyEndpgm:
714       OS << "Endpgm";
715       break;
716     }
717   }
718 
719   void print(raw_ostream &OS) const override {
720     switch (Kind) {
721     case Register:
722       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
723       break;
724     case Immediate:
725       OS << '<' << getImm();
726       if (getImmTy() != ImmTyNone) {
727         OS << " type: "; printImmTy(OS, getImmTy());
728       }
729       OS << " mods: " << Imm.Mods << '>';
730       break;
731     case Token:
732       OS << '\'' << getToken() << '\'';
733       break;
734     case Expression:
735       OS << "<expr " << *Expr << '>';
736       break;
737     }
738   }
739 
740   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
741                                       int64_t Val, SMLoc Loc,
742                                       ImmTy Type = ImmTyNone,
743                                       bool IsFPImm = false) {
744     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
745     Op->Imm.Val = Val;
746     Op->Imm.IsFPImm = IsFPImm;
747     Op->Imm.Type = Type;
748     Op->Imm.Mods = Modifiers();
749     Op->StartLoc = Loc;
750     Op->EndLoc = Loc;
751     return Op;
752   }
753 
754   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
755                                         StringRef Str, SMLoc Loc,
756                                         bool HasExplicitEncodingSize = true) {
757     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
758     Res->Tok.Data = Str.data();
759     Res->Tok.Length = Str.size();
760     Res->StartLoc = Loc;
761     Res->EndLoc = Loc;
762     return Res;
763   }
764 
765   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
766                                       unsigned RegNo, SMLoc S,
767                                       SMLoc E,
768                                       bool ForceVOP3) {
769     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
770     Op->Reg.RegNo = RegNo;
771     Op->Reg.Mods = Modifiers();
772     Op->Reg.IsForcedVOP3 = ForceVOP3;
773     Op->StartLoc = S;
774     Op->EndLoc = E;
775     return Op;
776   }
777 
778   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
779                                        const class MCExpr *Expr, SMLoc S) {
780     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
781     Op->Expr = Expr;
782     Op->StartLoc = S;
783     Op->EndLoc = S;
784     return Op;
785   }
786 };
787 
788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
789   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
790   return OS;
791 }
792 
793 //===----------------------------------------------------------------------===//
794 // AsmParser
795 //===----------------------------------------------------------------------===//
796 
797 // Holds info related to the current kernel, e.g. count of SGPRs used.
798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
799 // .amdgpu_hsa_kernel or at EOF.
800 class KernelScopeInfo {
801   int SgprIndexUnusedMin = -1;
802   int VgprIndexUnusedMin = -1;
803   MCContext *Ctx = nullptr;
804 
805   void usesSgprAt(int i) {
806     if (i >= SgprIndexUnusedMin) {
807       SgprIndexUnusedMin = ++i;
808       if (Ctx) {
809         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
810         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
811       }
812     }
813   }
814 
815   void usesVgprAt(int i) {
816     if (i >= VgprIndexUnusedMin) {
817       VgprIndexUnusedMin = ++i;
818       if (Ctx) {
819         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
820         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
821       }
822     }
823   }
824 
825 public:
826   KernelScopeInfo() = default;
827 
828   void initialize(MCContext &Context) {
829     Ctx = &Context;
830     usesSgprAt(SgprIndexUnusedMin = -1);
831     usesVgprAt(VgprIndexUnusedMin = -1);
832   }
833 
834   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
835     switch (RegKind) {
836       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
837       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
838       default: break;
839     }
840   }
841 };
842 
843 class AMDGPUAsmParser : public MCTargetAsmParser {
844   MCAsmParser &Parser;
845 
846   // Number of extra operands parsed after the first optional operand.
847   // This may be necessary to skip hardcoded mandatory operands.
848   static const unsigned MAX_OPR_LOOKAHEAD = 8;
849 
850   unsigned ForcedEncodingSize = 0;
851   bool ForcedDPP = false;
852   bool ForcedSDWA = false;
853   KernelScopeInfo KernelScope;
854 
855   /// @name Auto-generated Match Functions
856   /// {
857 
858 #define GET_ASSEMBLER_HEADER
859 #include "AMDGPUGenAsmMatcher.inc"
860 
861   /// }
862 
863 private:
864   bool ParseAsAbsoluteExpression(uint32_t &Ret);
865   bool OutOfRangeError(SMRange Range);
866   /// Calculate VGPR/SGPR blocks required for given target, reserved
867   /// registers, and user-specified NextFreeXGPR values.
868   ///
869   /// \param Features [in] Target features, used for bug corrections.
870   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
871   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
872   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
873   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
874   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
875   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
876   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
877   /// \param VGPRBlocks [out] Result VGPR block count.
878   /// \param SGPRBlocks [out] Result SGPR block count.
879   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
880                           bool FlatScrUsed, bool XNACKUsed,
881                           unsigned NextFreeVGPR, SMRange VGPRRange,
882                           unsigned NextFreeSGPR, SMRange SGPRRange,
883                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
884   bool ParseDirectiveAMDGCNTarget();
885   bool ParseDirectiveAMDHSAKernel();
886   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
887   bool ParseDirectiveHSACodeObjectVersion();
888   bool ParseDirectiveHSACodeObjectISA();
889   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
890   bool ParseDirectiveAMDKernelCodeT();
891   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
892   bool ParseDirectiveAMDGPUHsaKernel();
893 
894   bool ParseDirectiveISAVersion();
895   bool ParseDirectiveHSAMetadata();
896   bool ParseDirectivePALMetadataBegin();
897   bool ParseDirectivePALMetadata();
898 
899   /// Common code to parse out a block of text (typically YAML) between start and
900   /// end directives.
901   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
902                            const char *AssemblerDirectiveEnd,
903                            std::string &CollectString);
904 
905   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
906                              RegisterKind RegKind, unsigned Reg1,
907                              unsigned RegNum);
908   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
909                            unsigned& RegNum, unsigned& RegWidth,
910                            unsigned *DwordRegIndex);
911   bool isRegister();
912   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
913   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
914   void initializeGprCountSymbol(RegisterKind RegKind);
915   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
916                              unsigned RegWidth);
917   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
918                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
919   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
920                  bool IsGdsHardcoded);
921 
922 public:
923   enum AMDGPUMatchResultTy {
924     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
925   };
926 
927   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
928 
929   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
930                const MCInstrInfo &MII,
931                const MCTargetOptions &Options)
932       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
933     MCAsmParserExtension::Initialize(Parser);
934 
935     if (getFeatureBits().none()) {
936       // Set default features.
937       copySTI().ToggleFeature("southern-islands");
938     }
939 
940     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
941 
942     {
943       // TODO: make those pre-defined variables read-only.
944       // Currently there is none suitable machinery in the core llvm-mc for this.
945       // MCSymbol::isRedefinable is intended for another purpose, and
946       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
947       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
948       MCContext &Ctx = getContext();
949       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
950         MCSymbol *Sym =
951             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
952         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
953         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
954         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
955         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
956         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
957       } else {
958         MCSymbol *Sym =
959             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
963         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
964         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
965       }
966       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
967         initializeGprCountSymbol(IS_VGPR);
968         initializeGprCountSymbol(IS_SGPR);
969       } else
970         KernelScope.initialize(getContext());
971     }
972   }
973 
974   bool hasXNACK() const {
975     return AMDGPU::hasXNACK(getSTI());
976   }
977 
978   bool hasMIMG_R128() const {
979     return AMDGPU::hasMIMG_R128(getSTI());
980   }
981 
982   bool hasPackedD16() const {
983     return AMDGPU::hasPackedD16(getSTI());
984   }
985 
986   bool isSI() const {
987     return AMDGPU::isSI(getSTI());
988   }
989 
990   bool isCI() const {
991     return AMDGPU::isCI(getSTI());
992   }
993 
994   bool isVI() const {
995     return AMDGPU::isVI(getSTI());
996   }
997 
998   bool isGFX9() const {
999     return AMDGPU::isGFX9(getSTI());
1000   }
1001 
1002   bool hasInv2PiInlineImm() const {
1003     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1004   }
1005 
1006   bool hasFlatOffsets() const {
1007     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1008   }
1009 
1010   bool hasSGPR102_SGPR103() const {
1011     return !isVI();
1012   }
1013 
1014   bool hasIntClamp() const {
1015     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1016   }
1017 
1018   AMDGPUTargetStreamer &getTargetStreamer() {
1019     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1020     return static_cast<AMDGPUTargetStreamer &>(TS);
1021   }
1022 
1023   const MCRegisterInfo *getMRI() const {
1024     // We need this const_cast because for some reason getContext() is not const
1025     // in MCAsmParser.
1026     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1027   }
1028 
1029   const MCInstrInfo *getMII() const {
1030     return &MII;
1031   }
1032 
1033   const FeatureBitset &getFeatureBits() const {
1034     return getSTI().getFeatureBits();
1035   }
1036 
1037   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1038   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1039   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1040 
1041   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1042   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1043   bool isForcedDPP() const { return ForcedDPP; }
1044   bool isForcedSDWA() const { return ForcedSDWA; }
1045   ArrayRef<unsigned> getMatchedVariants() const;
1046 
1047   std::unique_ptr<AMDGPUOperand> parseRegister();
1048   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1049   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1050   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1051                                       unsigned Kind) override;
1052   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1053                                OperandVector &Operands, MCStreamer &Out,
1054                                uint64_t &ErrorInfo,
1055                                bool MatchingInlineAsm) override;
1056   bool ParseDirective(AsmToken DirectiveID) override;
1057   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1058   StringRef parseMnemonicSuffix(StringRef Name);
1059   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1060                         SMLoc NameLoc, OperandVector &Operands) override;
1061   //bool ProcessInstruction(MCInst &Inst);
1062 
1063   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1064 
1065   OperandMatchResultTy
1066   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1067                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1068                      bool (*ConvertResult)(int64_t &) = nullptr);
1069 
1070   OperandMatchResultTy parseOperandArrayWithPrefix(
1071     const char *Prefix,
1072     OperandVector &Operands,
1073     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1074     bool (*ConvertResult)(int64_t&) = nullptr);
1075 
1076   OperandMatchResultTy
1077   parseNamedBit(const char *Name, OperandVector &Operands,
1078                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1079   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1080                                              StringRef &Value);
1081 
1082   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1083   bool parseSP3NegModifier();
1084   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1085   OperandMatchResultTy parseReg(OperandVector &Operands);
1086   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1087   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1088   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1089   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1090   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1091   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1092   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1093 
1094   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1095   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1096   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1097   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1098 
1099   bool parseCnt(int64_t &IntVal);
1100   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1101   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1102 
1103 private:
1104   struct OperandInfoTy {
1105     int64_t Id;
1106     bool IsSymbolic = false;
1107 
1108     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1109   };
1110 
1111   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1112   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1113 
1114   void errorExpTgt();
1115   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1116 
1117   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1118   bool validateSOPLiteral(const MCInst &Inst) const;
1119   bool validateConstantBusLimitations(const MCInst &Inst);
1120   bool validateEarlyClobberLimitations(const MCInst &Inst);
1121   bool validateIntClampSupported(const MCInst &Inst);
1122   bool validateMIMGAtomicDMask(const MCInst &Inst);
1123   bool validateMIMGGatherDMask(const MCInst &Inst);
1124   bool validateMIMGDataSize(const MCInst &Inst);
1125   bool validateMIMGD16(const MCInst &Inst);
1126   bool validateLdsDirect(const MCInst &Inst);
1127   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1128   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1129   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1130 
1131   bool isId(const StringRef Id) const;
1132   bool isId(const AsmToken &Token, const StringRef Id) const;
1133   bool isToken(const AsmToken::TokenKind Kind) const;
1134   bool trySkipId(const StringRef Id);
1135   bool trySkipToken(const AsmToken::TokenKind Kind);
1136   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1137   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1138   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1139   AsmToken::TokenKind getTokenKind() const;
1140   bool parseExpr(int64_t &Imm);
1141   StringRef getTokenStr() const;
1142   AsmToken peekToken();
1143   AsmToken getToken() const;
1144   SMLoc getLoc() const;
1145   void lex();
1146 
1147 public:
1148   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1149   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1150 
1151   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1152   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1153   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1154   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1155   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1156 
1157   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1158                             const unsigned MinVal,
1159                             const unsigned MaxVal,
1160                             const StringRef ErrMsg);
1161   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1162   bool parseSwizzleOffset(int64_t &Imm);
1163   bool parseSwizzleMacro(int64_t &Imm);
1164   bool parseSwizzleQuadPerm(int64_t &Imm);
1165   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1166   bool parseSwizzleBroadcast(int64_t &Imm);
1167   bool parseSwizzleSwap(int64_t &Imm);
1168   bool parseSwizzleReverse(int64_t &Imm);
1169 
1170   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1171   int64_t parseGPRIdxMacro();
1172 
1173   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1174   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1175   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1176   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1177   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1178 
1179   AMDGPUOperand::Ptr defaultGLC() const;
1180   AMDGPUOperand::Ptr defaultSLC() const;
1181 
1182   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1183   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1184   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1185   AMDGPUOperand::Ptr defaultOffsetU12() const;
1186   AMDGPUOperand::Ptr defaultOffsetS13() const;
1187 
1188   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1189 
1190   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1191                OptionalImmIndexMap &OptionalIdx);
1192   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1193   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1194   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1195 
1196   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1197 
1198   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1199                bool IsAtomic = false);
1200   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1201 
1202   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1203   AMDGPUOperand::Ptr defaultRowMask() const;
1204   AMDGPUOperand::Ptr defaultBankMask() const;
1205   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1206   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1207 
1208   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1209                                     AMDGPUOperand::ImmTy Type);
1210   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1211   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1212   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1213   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1214   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1215   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1216                 uint64_t BasicInstType, bool skipVcc = false);
1217 
1218   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1219   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1220 };
1221 
1222 struct OptionalOperand {
1223   const char *Name;
1224   AMDGPUOperand::ImmTy Type;
1225   bool IsBit;
1226   bool (*ConvertResult)(int64_t&);
1227 };
1228 
1229 } // end anonymous namespace
1230 
1231 // May be called with integer type with equivalent bitwidth.
1232 static const fltSemantics *getFltSemantics(unsigned Size) {
1233   switch (Size) {
1234   case 4:
1235     return &APFloat::IEEEsingle();
1236   case 8:
1237     return &APFloat::IEEEdouble();
1238   case 2:
1239     return &APFloat::IEEEhalf();
1240   default:
1241     llvm_unreachable("unsupported fp type");
1242   }
1243 }
1244 
1245 static const fltSemantics *getFltSemantics(MVT VT) {
1246   return getFltSemantics(VT.getSizeInBits() / 8);
1247 }
1248 
1249 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1250   switch (OperandType) {
1251   case AMDGPU::OPERAND_REG_IMM_INT32:
1252   case AMDGPU::OPERAND_REG_IMM_FP32:
1253   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1254   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1255     return &APFloat::IEEEsingle();
1256   case AMDGPU::OPERAND_REG_IMM_INT64:
1257   case AMDGPU::OPERAND_REG_IMM_FP64:
1258   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1259   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1260     return &APFloat::IEEEdouble();
1261   case AMDGPU::OPERAND_REG_IMM_INT16:
1262   case AMDGPU::OPERAND_REG_IMM_FP16:
1263   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1264   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1265   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1266   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1267     return &APFloat::IEEEhalf();
1268   default:
1269     llvm_unreachable("unsupported fp type");
1270   }
1271 }
1272 
1273 //===----------------------------------------------------------------------===//
1274 // Operand
1275 //===----------------------------------------------------------------------===//
1276 
1277 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1278   bool Lost;
1279 
1280   // Convert literal to single precision
1281   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1282                                                APFloat::rmNearestTiesToEven,
1283                                                &Lost);
1284   // We allow precision lost but not overflow or underflow
1285   if (Status != APFloat::opOK &&
1286       Lost &&
1287       ((Status & APFloat::opOverflow)  != 0 ||
1288        (Status & APFloat::opUnderflow) != 0)) {
1289     return false;
1290   }
1291 
1292   return true;
1293 }
1294 
1295 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1296   return isUIntN(Size, Val) || isIntN(Size, Val);
1297 }
1298 
1299 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1300 
1301   // This is a hack to enable named inline values like
1302   // shared_base with both 32-bit and 64-bit operands.
1303   // Note that these values are defined as
1304   // 32-bit operands only.
1305   if (isInlineValue()) {
1306     return true;
1307   }
1308 
1309   if (!isImmTy(ImmTyNone)) {
1310     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1311     return false;
1312   }
1313   // TODO: We should avoid using host float here. It would be better to
1314   // check the float bit values which is what a few other places do.
1315   // We've had bot failures before due to weird NaN support on mips hosts.
1316 
1317   APInt Literal(64, Imm.Val);
1318 
1319   if (Imm.IsFPImm) { // We got fp literal token
1320     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1321       return AMDGPU::isInlinableLiteral64(Imm.Val,
1322                                           AsmParser->hasInv2PiInlineImm());
1323     }
1324 
1325     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1326     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1327       return false;
1328 
1329     if (type.getScalarSizeInBits() == 16) {
1330       return AMDGPU::isInlinableLiteral16(
1331         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1332         AsmParser->hasInv2PiInlineImm());
1333     }
1334 
1335     // Check if single precision literal is inlinable
1336     return AMDGPU::isInlinableLiteral32(
1337       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1338       AsmParser->hasInv2PiInlineImm());
1339   }
1340 
1341   // We got int literal token.
1342   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1343     return AMDGPU::isInlinableLiteral64(Imm.Val,
1344                                         AsmParser->hasInv2PiInlineImm());
1345   }
1346 
1347   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1348     return false;
1349   }
1350 
1351   if (type.getScalarSizeInBits() == 16) {
1352     return AMDGPU::isInlinableLiteral16(
1353       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1354       AsmParser->hasInv2PiInlineImm());
1355   }
1356 
1357   return AMDGPU::isInlinableLiteral32(
1358     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1359     AsmParser->hasInv2PiInlineImm());
1360 }
1361 
1362 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1363   // Check that this immediate can be added as literal
1364   if (!isImmTy(ImmTyNone)) {
1365     return false;
1366   }
1367 
1368   if (!Imm.IsFPImm) {
1369     // We got int literal token.
1370 
1371     if (type == MVT::f64 && hasFPModifiers()) {
1372       // Cannot apply fp modifiers to int literals preserving the same semantics
1373       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1374       // disable these cases.
1375       return false;
1376     }
1377 
1378     unsigned Size = type.getSizeInBits();
1379     if (Size == 64)
1380       Size = 32;
1381 
1382     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1383     // types.
1384     return isSafeTruncation(Imm.Val, Size);
1385   }
1386 
1387   // We got fp literal token
1388   if (type == MVT::f64) { // Expected 64-bit fp operand
1389     // We would set low 64-bits of literal to zeroes but we accept this literals
1390     return true;
1391   }
1392 
1393   if (type == MVT::i64) { // Expected 64-bit int operand
1394     // We don't allow fp literals in 64-bit integer instructions. It is
1395     // unclear how we should encode them.
1396     return false;
1397   }
1398 
1399   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1400   return canLosslesslyConvertToFPType(FPLiteral, type);
1401 }
1402 
1403 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1404   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1405 }
1406 
1407 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1408   if (AsmParser->isVI())
1409     return isVReg32();
1410   else if (AsmParser->isGFX9())
1411     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1412   else
1413     return false;
1414 }
1415 
1416 bool AMDGPUOperand::isSDWAFP16Operand() const {
1417   return isSDWAOperand(MVT::f16);
1418 }
1419 
1420 bool AMDGPUOperand::isSDWAFP32Operand() const {
1421   return isSDWAOperand(MVT::f32);
1422 }
1423 
1424 bool AMDGPUOperand::isSDWAInt16Operand() const {
1425   return isSDWAOperand(MVT::i16);
1426 }
1427 
1428 bool AMDGPUOperand::isSDWAInt32Operand() const {
1429   return isSDWAOperand(MVT::i32);
1430 }
1431 
1432 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1433 {
1434   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1435   assert(Size == 2 || Size == 4 || Size == 8);
1436 
1437   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1438 
1439   if (Imm.Mods.Abs) {
1440     Val &= ~FpSignMask;
1441   }
1442   if (Imm.Mods.Neg) {
1443     Val ^= FpSignMask;
1444   }
1445 
1446   return Val;
1447 }
1448 
1449 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1450   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1451                              Inst.getNumOperands())) {
1452     addLiteralImmOperand(Inst, Imm.Val,
1453                          ApplyModifiers &
1454                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1455   } else {
1456     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1457     Inst.addOperand(MCOperand::createImm(Imm.Val));
1458   }
1459 }
1460 
1461 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1462   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1463   auto OpNum = Inst.getNumOperands();
1464   // Check that this operand accepts literals
1465   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1466 
1467   if (ApplyModifiers) {
1468     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1469     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1470     Val = applyInputFPModifiers(Val, Size);
1471   }
1472 
1473   APInt Literal(64, Val);
1474   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1475 
1476   if (Imm.IsFPImm) { // We got fp literal token
1477     switch (OpTy) {
1478     case AMDGPU::OPERAND_REG_IMM_INT64:
1479     case AMDGPU::OPERAND_REG_IMM_FP64:
1480     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1481     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1482       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1483                                        AsmParser->hasInv2PiInlineImm())) {
1484         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1485         return;
1486       }
1487 
1488       // Non-inlineable
1489       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1490         // For fp operands we check if low 32 bits are zeros
1491         if (Literal.getLoBits(32) != 0) {
1492           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1493           "Can't encode literal as exact 64-bit floating-point operand. "
1494           "Low 32-bits will be set to zero");
1495         }
1496 
1497         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1498         return;
1499       }
1500 
1501       // We don't allow fp literals in 64-bit integer instructions. It is
1502       // unclear how we should encode them. This case should be checked earlier
1503       // in predicate methods (isLiteralImm())
1504       llvm_unreachable("fp literal in 64-bit integer instruction.");
1505 
1506     case AMDGPU::OPERAND_REG_IMM_INT32:
1507     case AMDGPU::OPERAND_REG_IMM_FP32:
1508     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1509     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1510     case AMDGPU::OPERAND_REG_IMM_INT16:
1511     case AMDGPU::OPERAND_REG_IMM_FP16:
1512     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1513     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1514     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1515     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1516       bool lost;
1517       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1518       // Convert literal to single precision
1519       FPLiteral.convert(*getOpFltSemantics(OpTy),
1520                         APFloat::rmNearestTiesToEven, &lost);
1521       // We allow precision lost but not overflow or underflow. This should be
1522       // checked earlier in isLiteralImm()
1523 
1524       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1525       Inst.addOperand(MCOperand::createImm(ImmVal));
1526       return;
1527     }
1528     default:
1529       llvm_unreachable("invalid operand size");
1530     }
1531 
1532     return;
1533   }
1534 
1535   // We got int literal token.
1536   // Only sign extend inline immediates.
1537   switch (OpTy) {
1538   case AMDGPU::OPERAND_REG_IMM_INT32:
1539   case AMDGPU::OPERAND_REG_IMM_FP32:
1540   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1541   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1542     if (isSafeTruncation(Val, 32) &&
1543         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1544                                      AsmParser->hasInv2PiInlineImm())) {
1545       Inst.addOperand(MCOperand::createImm(Val));
1546       return;
1547     }
1548 
1549     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1550     return;
1551 
1552   case AMDGPU::OPERAND_REG_IMM_INT64:
1553   case AMDGPU::OPERAND_REG_IMM_FP64:
1554   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1555   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1556     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1557       Inst.addOperand(MCOperand::createImm(Val));
1558       return;
1559     }
1560 
1561     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1562     return;
1563 
1564   case AMDGPU::OPERAND_REG_IMM_INT16:
1565   case AMDGPU::OPERAND_REG_IMM_FP16:
1566   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1567   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1568     if (isSafeTruncation(Val, 16) &&
1569         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1570                                      AsmParser->hasInv2PiInlineImm())) {
1571       Inst.addOperand(MCOperand::createImm(Val));
1572       return;
1573     }
1574 
1575     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1576     return;
1577 
1578   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1579   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1580     assert(isSafeTruncation(Val, 16));
1581     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1582                                         AsmParser->hasInv2PiInlineImm()));
1583 
1584     Inst.addOperand(MCOperand::createImm(Val));
1585     return;
1586   }
1587   default:
1588     llvm_unreachable("invalid operand size");
1589   }
1590 }
1591 
1592 template <unsigned Bitwidth>
1593 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1594   APInt Literal(64, Imm.Val);
1595 
1596   if (!Imm.IsFPImm) {
1597     // We got int literal token.
1598     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1599     return;
1600   }
1601 
1602   bool Lost;
1603   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1604   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1605                     APFloat::rmNearestTiesToEven, &Lost);
1606   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1607 }
1608 
1609 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1610   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1611 }
1612 
1613 static bool isInlineValue(unsigned Reg) {
1614   switch (Reg) {
1615   case AMDGPU::SRC_SHARED_BASE:
1616   case AMDGPU::SRC_SHARED_LIMIT:
1617   case AMDGPU::SRC_PRIVATE_BASE:
1618   case AMDGPU::SRC_PRIVATE_LIMIT:
1619   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1620     return true;
1621   default:
1622     return false;
1623   }
1624 }
1625 
1626 bool AMDGPUOperand::isInlineValue() const {
1627   return isRegKind() && ::isInlineValue(getReg());
1628 }
1629 
1630 //===----------------------------------------------------------------------===//
1631 // AsmParser
1632 //===----------------------------------------------------------------------===//
1633 
1634 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1635   if (Is == IS_VGPR) {
1636     switch (RegWidth) {
1637       default: return -1;
1638       case 1: return AMDGPU::VGPR_32RegClassID;
1639       case 2: return AMDGPU::VReg_64RegClassID;
1640       case 3: return AMDGPU::VReg_96RegClassID;
1641       case 4: return AMDGPU::VReg_128RegClassID;
1642       case 8: return AMDGPU::VReg_256RegClassID;
1643       case 16: return AMDGPU::VReg_512RegClassID;
1644     }
1645   } else if (Is == IS_TTMP) {
1646     switch (RegWidth) {
1647       default: return -1;
1648       case 1: return AMDGPU::TTMP_32RegClassID;
1649       case 2: return AMDGPU::TTMP_64RegClassID;
1650       case 4: return AMDGPU::TTMP_128RegClassID;
1651       case 8: return AMDGPU::TTMP_256RegClassID;
1652       case 16: return AMDGPU::TTMP_512RegClassID;
1653     }
1654   } else if (Is == IS_SGPR) {
1655     switch (RegWidth) {
1656       default: return -1;
1657       case 1: return AMDGPU::SGPR_32RegClassID;
1658       case 2: return AMDGPU::SGPR_64RegClassID;
1659       case 4: return AMDGPU::SGPR_128RegClassID;
1660       case 8: return AMDGPU::SGPR_256RegClassID;
1661       case 16: return AMDGPU::SGPR_512RegClassID;
1662     }
1663   }
1664   return -1;
1665 }
1666 
1667 static unsigned getSpecialRegForName(StringRef RegName) {
1668   return StringSwitch<unsigned>(RegName)
1669     .Case("exec", AMDGPU::EXEC)
1670     .Case("vcc", AMDGPU::VCC)
1671     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1672     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1673     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1674     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1675     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1676     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1677     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1678     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1679     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1680     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1681     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1682     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1683     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1684     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1685     .Case("m0", AMDGPU::M0)
1686     .Case("scc", AMDGPU::SCC)
1687     .Case("tba", AMDGPU::TBA)
1688     .Case("tma", AMDGPU::TMA)
1689     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1690     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1691     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1692     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1693     .Case("vcc_lo", AMDGPU::VCC_LO)
1694     .Case("vcc_hi", AMDGPU::VCC_HI)
1695     .Case("exec_lo", AMDGPU::EXEC_LO)
1696     .Case("exec_hi", AMDGPU::EXEC_HI)
1697     .Case("tma_lo", AMDGPU::TMA_LO)
1698     .Case("tma_hi", AMDGPU::TMA_HI)
1699     .Case("tba_lo", AMDGPU::TBA_LO)
1700     .Case("tba_hi", AMDGPU::TBA_HI)
1701     .Default(0);
1702 }
1703 
1704 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1705                                     SMLoc &EndLoc) {
1706   auto R = parseRegister();
1707   if (!R) return true;
1708   assert(R->isReg());
1709   RegNo = R->getReg();
1710   StartLoc = R->getStartLoc();
1711   EndLoc = R->getEndLoc();
1712   return false;
1713 }
1714 
1715 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1716                                             RegisterKind RegKind, unsigned Reg1,
1717                                             unsigned RegNum) {
1718   switch (RegKind) {
1719   case IS_SPECIAL:
1720     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1721       Reg = AMDGPU::EXEC;
1722       RegWidth = 2;
1723       return true;
1724     }
1725     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1726       Reg = AMDGPU::FLAT_SCR;
1727       RegWidth = 2;
1728       return true;
1729     }
1730     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1731       Reg = AMDGPU::XNACK_MASK;
1732       RegWidth = 2;
1733       return true;
1734     }
1735     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1736       Reg = AMDGPU::VCC;
1737       RegWidth = 2;
1738       return true;
1739     }
1740     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1741       Reg = AMDGPU::TBA;
1742       RegWidth = 2;
1743       return true;
1744     }
1745     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1746       Reg = AMDGPU::TMA;
1747       RegWidth = 2;
1748       return true;
1749     }
1750     return false;
1751   case IS_VGPR:
1752   case IS_SGPR:
1753   case IS_TTMP:
1754     if (Reg1 != Reg + RegWidth) {
1755       return false;
1756     }
1757     RegWidth++;
1758     return true;
1759   default:
1760     llvm_unreachable("unexpected register kind");
1761   }
1762 }
1763 
1764 static const StringRef Registers[] = {
1765   { "v" },
1766   { "s" },
1767   { "ttmp" },
1768 };
1769 
1770 bool
1771 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1772                             const AsmToken &NextToken) const {
1773 
1774   // A list of consecutive registers: [s0,s1,s2,s3]
1775   if (Token.is(AsmToken::LBrac))
1776     return true;
1777 
1778   if (!Token.is(AsmToken::Identifier))
1779     return false;
1780 
1781   // A single register like s0 or a range of registers like s[0:1]
1782 
1783   StringRef RegName = Token.getString();
1784 
1785   for (StringRef Reg : Registers) {
1786     if (RegName.startswith(Reg)) {
1787       if (Reg.size() < RegName.size()) {
1788         unsigned RegNum;
1789         // A single register with an index: rXX
1790         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1791           return true;
1792       } else {
1793         // A range of registers: r[XX:YY].
1794         if (NextToken.is(AsmToken::LBrac))
1795           return true;
1796       }
1797     }
1798   }
1799 
1800   return getSpecialRegForName(RegName);
1801 }
1802 
1803 bool
1804 AMDGPUAsmParser::isRegister()
1805 {
1806   return isRegister(getToken(), peekToken());
1807 }
1808 
1809 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1810                                           unsigned &RegNum, unsigned &RegWidth,
1811                                           unsigned *DwordRegIndex) {
1812   if (DwordRegIndex) { *DwordRegIndex = 0; }
1813   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1814   if (getLexer().is(AsmToken::Identifier)) {
1815     StringRef RegName = Parser.getTok().getString();
1816     if ((Reg = getSpecialRegForName(RegName))) {
1817       Parser.Lex();
1818       RegKind = IS_SPECIAL;
1819     } else {
1820       unsigned RegNumIndex = 0;
1821       if (RegName[0] == 'v') {
1822         RegNumIndex = 1;
1823         RegKind = IS_VGPR;
1824       } else if (RegName[0] == 's') {
1825         RegNumIndex = 1;
1826         RegKind = IS_SGPR;
1827       } else if (RegName.startswith("ttmp")) {
1828         RegNumIndex = strlen("ttmp");
1829         RegKind = IS_TTMP;
1830       } else {
1831         return false;
1832       }
1833       if (RegName.size() > RegNumIndex) {
1834         // Single 32-bit register: vXX.
1835         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1836           return false;
1837         Parser.Lex();
1838         RegWidth = 1;
1839       } else {
1840         // Range of registers: v[XX:YY]. ":YY" is optional.
1841         Parser.Lex();
1842         int64_t RegLo, RegHi;
1843         if (getLexer().isNot(AsmToken::LBrac))
1844           return false;
1845         Parser.Lex();
1846 
1847         if (getParser().parseAbsoluteExpression(RegLo))
1848           return false;
1849 
1850         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1851         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1852           return false;
1853         Parser.Lex();
1854 
1855         if (isRBrace) {
1856           RegHi = RegLo;
1857         } else {
1858           if (getParser().parseAbsoluteExpression(RegHi))
1859             return false;
1860 
1861           if (getLexer().isNot(AsmToken::RBrac))
1862             return false;
1863           Parser.Lex();
1864         }
1865         RegNum = (unsigned) RegLo;
1866         RegWidth = (RegHi - RegLo) + 1;
1867       }
1868     }
1869   } else if (getLexer().is(AsmToken::LBrac)) {
1870     // List of consecutive registers: [s0,s1,s2,s3]
1871     Parser.Lex();
1872     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1873       return false;
1874     if (RegWidth != 1)
1875       return false;
1876     RegisterKind RegKind1;
1877     unsigned Reg1, RegNum1, RegWidth1;
1878     do {
1879       if (getLexer().is(AsmToken::Comma)) {
1880         Parser.Lex();
1881       } else if (getLexer().is(AsmToken::RBrac)) {
1882         Parser.Lex();
1883         break;
1884       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1885         if (RegWidth1 != 1) {
1886           return false;
1887         }
1888         if (RegKind1 != RegKind) {
1889           return false;
1890         }
1891         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1892           return false;
1893         }
1894       } else {
1895         return false;
1896       }
1897     } while (true);
1898   } else {
1899     return false;
1900   }
1901   switch (RegKind) {
1902   case IS_SPECIAL:
1903     RegNum = 0;
1904     RegWidth = 1;
1905     break;
1906   case IS_VGPR:
1907   case IS_SGPR:
1908   case IS_TTMP:
1909   {
1910     unsigned Size = 1;
1911     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1912       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1913       Size = std::min(RegWidth, 4u);
1914     }
1915     if (RegNum % Size != 0)
1916       return false;
1917     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1918     RegNum = RegNum / Size;
1919     int RCID = getRegClass(RegKind, RegWidth);
1920     if (RCID == -1)
1921       return false;
1922     const MCRegisterClass RC = TRI->getRegClass(RCID);
1923     if (RegNum >= RC.getNumRegs())
1924       return false;
1925     Reg = RC.getRegister(RegNum);
1926     break;
1927   }
1928 
1929   default:
1930     llvm_unreachable("unexpected register kind");
1931   }
1932 
1933   if (!subtargetHasRegister(*TRI, Reg))
1934     return false;
1935   return true;
1936 }
1937 
1938 Optional<StringRef>
1939 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1940   switch (RegKind) {
1941   case IS_VGPR:
1942     return StringRef(".amdgcn.next_free_vgpr");
1943   case IS_SGPR:
1944     return StringRef(".amdgcn.next_free_sgpr");
1945   default:
1946     return None;
1947   }
1948 }
1949 
1950 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1951   auto SymbolName = getGprCountSymbolName(RegKind);
1952   assert(SymbolName && "initializing invalid register kind");
1953   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1954   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1955 }
1956 
1957 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1958                                             unsigned DwordRegIndex,
1959                                             unsigned RegWidth) {
1960   // Symbols are only defined for GCN targets
1961   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1962     return true;
1963 
1964   auto SymbolName = getGprCountSymbolName(RegKind);
1965   if (!SymbolName)
1966     return true;
1967   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1968 
1969   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1970   int64_t OldCount;
1971 
1972   if (!Sym->isVariable())
1973     return !Error(getParser().getTok().getLoc(),
1974                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1975   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1976     return !Error(
1977         getParser().getTok().getLoc(),
1978         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1979 
1980   if (OldCount <= NewMax)
1981     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1982 
1983   return true;
1984 }
1985 
1986 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1987   const auto &Tok = Parser.getTok();
1988   SMLoc StartLoc = Tok.getLoc();
1989   SMLoc EndLoc = Tok.getEndLoc();
1990   RegisterKind RegKind;
1991   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1992 
1993   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1994     //FIXME: improve error messages (bug 41303).
1995     Error(StartLoc, "not a valid operand.");
1996     return nullptr;
1997   }
1998   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1999     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2000       return nullptr;
2001   } else
2002     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2003   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
2004 }
2005 
2006 bool
2007 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2008   if (HasSP3AbsModifier) {
2009     // This is a workaround for handling expressions
2010     // as arguments of SP3 'abs' modifier, for example:
2011     //     |1.0|
2012     //     |-1|
2013     //     |1+x|
2014     // This syntax is not compatible with syntax of standard
2015     // MC expressions (due to the trailing '|').
2016 
2017     SMLoc EndLoc;
2018     const MCExpr *Expr;
2019     SMLoc StartLoc = getLoc();
2020 
2021     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2022       return true;
2023     }
2024 
2025     if (!Expr->evaluateAsAbsolute(Val))
2026       return Error(StartLoc, "expected absolute expression");
2027 
2028     return false;
2029   }
2030 
2031   return getParser().parseAbsoluteExpression(Val);
2032 }
2033 
2034 OperandMatchResultTy
2035 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2036   // TODO: add syntactic sugar for 1/(2*PI)
2037 
2038   const auto& Tok = getToken();
2039   const auto& NextTok = peekToken();
2040   bool IsReal = Tok.is(AsmToken::Real);
2041   SMLoc S = Tok.getLoc();
2042   bool Negate = false;
2043 
2044   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2045     lex();
2046     IsReal = true;
2047     Negate = true;
2048   }
2049 
2050   if (IsReal) {
2051     // Floating-point expressions are not supported.
2052     // Can only allow floating-point literals with an
2053     // optional sign.
2054 
2055     StringRef Num = getTokenStr();
2056     lex();
2057 
2058     APFloat RealVal(APFloat::IEEEdouble());
2059     auto roundMode = APFloat::rmNearestTiesToEven;
2060     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2061       return MatchOperand_ParseFail;
2062     }
2063     if (Negate)
2064       RealVal.changeSign();
2065 
2066     Operands.push_back(
2067       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2068                                AMDGPUOperand::ImmTyNone, true));
2069 
2070     return MatchOperand_Success;
2071 
2072     // FIXME: Should enable arbitrary expressions here
2073   } else if (Tok.is(AsmToken::Integer) ||
2074              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2075 
2076     int64_t IntVal;
2077     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2078       return MatchOperand_ParseFail;
2079 
2080     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2081     return MatchOperand_Success;
2082   }
2083 
2084   return MatchOperand_NoMatch;
2085 }
2086 
2087 OperandMatchResultTy
2088 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2089   if (!isRegister())
2090     return MatchOperand_NoMatch;
2091 
2092   if (auto R = parseRegister()) {
2093     assert(R->isReg());
2094     R->Reg.IsForcedVOP3 = isForcedVOP3();
2095     Operands.push_back(std::move(R));
2096     return MatchOperand_Success;
2097   }
2098   return MatchOperand_ParseFail;
2099 }
2100 
2101 OperandMatchResultTy
2102 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
2103   auto res = parseReg(Operands);
2104   return (res == MatchOperand_NoMatch)?
2105          parseImm(Operands, AbsMod) :
2106          res;
2107 }
2108 
2109 // Check if the current token is an SP3 'neg' modifier.
2110 // Currently this modifier is allowed in the following context:
2111 //
2112 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2113 // 2. Before an 'abs' modifier: -abs(...)
2114 // 3. Before an SP3 'abs' modifier: -|...|
2115 //
2116 // In all other cases "-" is handled as a part
2117 // of an expression that follows the sign.
2118 //
2119 // Note: When "-" is followed by an integer literal,
2120 // this is interpreted as integer negation rather
2121 // than a floating-point NEG modifier applied to N.
2122 // Beside being contr-intuitive, such use of floating-point
2123 // NEG modifier would have resulted in different meaning
2124 // of integer literals used with VOP1/2/C and VOP3,
2125 // for example:
2126 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2127 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2128 // Negative fp literals with preceding "-" are
2129 // handled likewise for unifomtity
2130 //
2131 bool
2132 AMDGPUAsmParser::parseSP3NegModifier() {
2133 
2134   AsmToken NextToken[2];
2135   peekTokens(NextToken);
2136 
2137   if (isToken(AsmToken::Minus) &&
2138       (isRegister(NextToken[0], NextToken[1]) ||
2139        NextToken[0].is(AsmToken::Pipe) ||
2140        isId(NextToken[0], "abs"))) {
2141     lex();
2142     return true;
2143   }
2144 
2145   return false;
2146 }
2147 
2148 OperandMatchResultTy
2149 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2150                                               bool AllowImm) {
2151   bool Negate, Negate2 = false, Abs = false, Abs2 = false;
2152 
2153   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2154   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2155     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2156     return MatchOperand_ParseFail;
2157   }
2158 
2159   Negate = parseSP3NegModifier();
2160 
2161   if (getLexer().getKind() == AsmToken::Identifier &&
2162       Parser.getTok().getString() == "neg") {
2163     if (Negate) {
2164       Error(Parser.getTok().getLoc(), "expected register or immediate");
2165       return MatchOperand_ParseFail;
2166     }
2167     Parser.Lex();
2168     Negate2 = true;
2169     if (getLexer().isNot(AsmToken::LParen)) {
2170       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2171       return MatchOperand_ParseFail;
2172     }
2173     Parser.Lex();
2174   }
2175 
2176   if (getLexer().getKind() == AsmToken::Identifier &&
2177       Parser.getTok().getString() == "abs") {
2178     Parser.Lex();
2179     Abs2 = true;
2180     if (getLexer().isNot(AsmToken::LParen)) {
2181       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2182       return MatchOperand_ParseFail;
2183     }
2184     Parser.Lex();
2185   }
2186 
2187   if (getLexer().getKind() == AsmToken::Pipe) {
2188     if (Abs2) {
2189       Error(Parser.getTok().getLoc(), "expected register or immediate");
2190       return MatchOperand_ParseFail;
2191     }
2192     Parser.Lex();
2193     Abs = true;
2194   }
2195 
2196   OperandMatchResultTy Res;
2197   if (AllowImm) {
2198     Res = parseRegOrImm(Operands, Abs);
2199   } else {
2200     Res = parseReg(Operands);
2201   }
2202   if (Res != MatchOperand_Success) {
2203     return (Negate || Negate2 || Abs || Abs2)? MatchOperand_ParseFail : Res;
2204   }
2205 
2206   AMDGPUOperand::Modifiers Mods;
2207   if (Abs) {
2208     if (getLexer().getKind() != AsmToken::Pipe) {
2209       Error(Parser.getTok().getLoc(), "expected vertical bar");
2210       return MatchOperand_ParseFail;
2211     }
2212     Parser.Lex();
2213     Mods.Abs = true;
2214   }
2215   if (Abs2) {
2216     if (getLexer().isNot(AsmToken::RParen)) {
2217       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2218       return MatchOperand_ParseFail;
2219     }
2220     Parser.Lex();
2221     Mods.Abs = true;
2222   }
2223 
2224   if (Negate) {
2225     Mods.Neg = true;
2226   } else if (Negate2) {
2227     if (getLexer().isNot(AsmToken::RParen)) {
2228       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2229       return MatchOperand_ParseFail;
2230     }
2231     Parser.Lex();
2232     Mods.Neg = true;
2233   }
2234 
2235   if (Mods.hasFPModifiers()) {
2236     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2237     Op.setModifiers(Mods);
2238   }
2239   return MatchOperand_Success;
2240 }
2241 
2242 OperandMatchResultTy
2243 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2244                                                bool AllowImm) {
2245   bool Sext = false;
2246 
2247   if (getLexer().getKind() == AsmToken::Identifier &&
2248       Parser.getTok().getString() == "sext") {
2249     Parser.Lex();
2250     Sext = true;
2251     if (getLexer().isNot(AsmToken::LParen)) {
2252       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2253       return MatchOperand_ParseFail;
2254     }
2255     Parser.Lex();
2256   }
2257 
2258   OperandMatchResultTy Res;
2259   if (AllowImm) {
2260     Res = parseRegOrImm(Operands);
2261   } else {
2262     Res = parseReg(Operands);
2263   }
2264   if (Res != MatchOperand_Success) {
2265     return Sext? MatchOperand_ParseFail : Res;
2266   }
2267 
2268   AMDGPUOperand::Modifiers Mods;
2269   if (Sext) {
2270     if (getLexer().isNot(AsmToken::RParen)) {
2271       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2272       return MatchOperand_ParseFail;
2273     }
2274     Parser.Lex();
2275     Mods.Sext = true;
2276   }
2277 
2278   if (Mods.hasIntModifiers()) {
2279     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2280     Op.setModifiers(Mods);
2281   }
2282 
2283   return MatchOperand_Success;
2284 }
2285 
2286 OperandMatchResultTy
2287 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2288   return parseRegOrImmWithFPInputMods(Operands, false);
2289 }
2290 
2291 OperandMatchResultTy
2292 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2293   return parseRegOrImmWithIntInputMods(Operands, false);
2294 }
2295 
2296 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2297   auto Loc = getLoc();
2298   if (trySkipId("off")) {
2299     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2300                                                 AMDGPUOperand::ImmTyOff, false));
2301     return MatchOperand_Success;
2302   }
2303 
2304   if (!isRegister())
2305     return MatchOperand_NoMatch;
2306 
2307   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2308   if (Reg) {
2309     Operands.push_back(std::move(Reg));
2310     return MatchOperand_Success;
2311   }
2312 
2313   return MatchOperand_ParseFail;
2314 
2315 }
2316 
2317 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2318   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2319 
2320   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2321       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2322       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2323       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2324     return Match_InvalidOperand;
2325 
2326   if ((TSFlags & SIInstrFlags::VOP3) &&
2327       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2328       getForcedEncodingSize() != 64)
2329     return Match_PreferE32;
2330 
2331   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2332       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2333     // v_mac_f32/16 allow only dst_sel == DWORD;
2334     auto OpNum =
2335         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2336     const auto &Op = Inst.getOperand(OpNum);
2337     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2338       return Match_InvalidOperand;
2339     }
2340   }
2341 
2342   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2343     // FIXME: Produces error without correct column reported.
2344     auto OpNum =
2345         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2346     const auto &Op = Inst.getOperand(OpNum);
2347     if (Op.getImm() != 0)
2348       return Match_InvalidOperand;
2349   }
2350 
2351   return Match_Success;
2352 }
2353 
2354 // What asm variants we should check
2355 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2356   if (getForcedEncodingSize() == 32) {
2357     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2358     return makeArrayRef(Variants);
2359   }
2360 
2361   if (isForcedVOP3()) {
2362     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2363     return makeArrayRef(Variants);
2364   }
2365 
2366   if (isForcedSDWA()) {
2367     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2368                                         AMDGPUAsmVariants::SDWA9};
2369     return makeArrayRef(Variants);
2370   }
2371 
2372   if (isForcedDPP()) {
2373     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2374     return makeArrayRef(Variants);
2375   }
2376 
2377   static const unsigned Variants[] = {
2378     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2379     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2380   };
2381 
2382   return makeArrayRef(Variants);
2383 }
2384 
2385 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2386   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2387   const unsigned Num = Desc.getNumImplicitUses();
2388   for (unsigned i = 0; i < Num; ++i) {
2389     unsigned Reg = Desc.ImplicitUses[i];
2390     switch (Reg) {
2391     case AMDGPU::FLAT_SCR:
2392     case AMDGPU::VCC:
2393     case AMDGPU::M0:
2394       return Reg;
2395     default:
2396       break;
2397     }
2398   }
2399   return AMDGPU::NoRegister;
2400 }
2401 
2402 // NB: This code is correct only when used to check constant
2403 // bus limitations because GFX7 support no f16 inline constants.
2404 // Note that there are no cases when a GFX7 opcode violates
2405 // constant bus limitations due to the use of an f16 constant.
2406 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2407                                        unsigned OpIdx) const {
2408   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2409 
2410   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2411     return false;
2412   }
2413 
2414   const MCOperand &MO = Inst.getOperand(OpIdx);
2415 
2416   int64_t Val = MO.getImm();
2417   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2418 
2419   switch (OpSize) { // expected operand size
2420   case 8:
2421     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2422   case 4:
2423     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2424   case 2: {
2425     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2426     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2427         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2428       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2429     } else {
2430       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2431     }
2432   }
2433   default:
2434     llvm_unreachable("invalid operand size");
2435   }
2436 }
2437 
2438 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2439   const MCOperand &MO = Inst.getOperand(OpIdx);
2440   if (MO.isImm()) {
2441     return !isInlineConstant(Inst, OpIdx);
2442   }
2443   return !MO.isReg() ||
2444          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2445 }
2446 
2447 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2448   const unsigned Opcode = Inst.getOpcode();
2449   const MCInstrDesc &Desc = MII.get(Opcode);
2450   unsigned ConstantBusUseCount = 0;
2451 
2452   if (Desc.TSFlags &
2453       (SIInstrFlags::VOPC |
2454        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2455        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2456        SIInstrFlags::SDWA)) {
2457     // Check special imm operands (used by madmk, etc)
2458     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2459       ++ConstantBusUseCount;
2460     }
2461 
2462     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2463     if (SGPRUsed != AMDGPU::NoRegister) {
2464       ++ConstantBusUseCount;
2465     }
2466 
2467     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2468     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2469     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2470 
2471     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2472 
2473     for (int OpIdx : OpIndices) {
2474       if (OpIdx == -1) break;
2475 
2476       const MCOperand &MO = Inst.getOperand(OpIdx);
2477       if (usesConstantBus(Inst, OpIdx)) {
2478         if (MO.isReg()) {
2479           const unsigned Reg = mc2PseudoReg(MO.getReg());
2480           // Pairs of registers with a partial intersections like these
2481           //   s0, s[0:1]
2482           //   flat_scratch_lo, flat_scratch
2483           //   flat_scratch_lo, flat_scratch_hi
2484           // are theoretically valid but they are disabled anyway.
2485           // Note that this code mimics SIInstrInfo::verifyInstruction
2486           if (Reg != SGPRUsed) {
2487             ++ConstantBusUseCount;
2488           }
2489           SGPRUsed = Reg;
2490         } else { // Expression or a literal
2491           ++ConstantBusUseCount;
2492         }
2493       }
2494     }
2495   }
2496 
2497   return ConstantBusUseCount <= 1;
2498 }
2499 
2500 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2501   const unsigned Opcode = Inst.getOpcode();
2502   const MCInstrDesc &Desc = MII.get(Opcode);
2503 
2504   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2505   if (DstIdx == -1 ||
2506       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2507     return true;
2508   }
2509 
2510   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2511 
2512   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2513   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2514   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2515 
2516   assert(DstIdx != -1);
2517   const MCOperand &Dst = Inst.getOperand(DstIdx);
2518   assert(Dst.isReg());
2519   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2520 
2521   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2522 
2523   for (int SrcIdx : SrcIndices) {
2524     if (SrcIdx == -1) break;
2525     const MCOperand &Src = Inst.getOperand(SrcIdx);
2526     if (Src.isReg()) {
2527       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2528       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2529         return false;
2530       }
2531     }
2532   }
2533 
2534   return true;
2535 }
2536 
2537 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2538 
2539   const unsigned Opc = Inst.getOpcode();
2540   const MCInstrDesc &Desc = MII.get(Opc);
2541 
2542   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2543     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2544     assert(ClampIdx != -1);
2545     return Inst.getOperand(ClampIdx).getImm() == 0;
2546   }
2547 
2548   return true;
2549 }
2550 
2551 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2552 
2553   const unsigned Opc = Inst.getOpcode();
2554   const MCInstrDesc &Desc = MII.get(Opc);
2555 
2556   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2557     return true;
2558 
2559   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2560   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2561   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2562 
2563   assert(VDataIdx != -1);
2564   assert(DMaskIdx != -1);
2565   assert(TFEIdx != -1);
2566 
2567   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2568   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2569   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2570   if (DMask == 0)
2571     DMask = 1;
2572 
2573   unsigned DataSize =
2574     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2575   if (hasPackedD16()) {
2576     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2577     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2578       DataSize = (DataSize + 1) / 2;
2579   }
2580 
2581   return (VDataSize / 4) == DataSize + TFESize;
2582 }
2583 
2584 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2585 
2586   const unsigned Opc = Inst.getOpcode();
2587   const MCInstrDesc &Desc = MII.get(Opc);
2588 
2589   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2590     return true;
2591   if (!Desc.mayLoad() || !Desc.mayStore())
2592     return true; // Not atomic
2593 
2594   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2595   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2596 
2597   // This is an incomplete check because image_atomic_cmpswap
2598   // may only use 0x3 and 0xf while other atomic operations
2599   // may use 0x1 and 0x3. However these limitations are
2600   // verified when we check that dmask matches dst size.
2601   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2602 }
2603 
2604 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2605 
2606   const unsigned Opc = Inst.getOpcode();
2607   const MCInstrDesc &Desc = MII.get(Opc);
2608 
2609   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2610     return true;
2611 
2612   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2613   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2614 
2615   // GATHER4 instructions use dmask in a different fashion compared to
2616   // other MIMG instructions. The only useful DMASK values are
2617   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2618   // (red,red,red,red) etc.) The ISA document doesn't mention
2619   // this.
2620   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2621 }
2622 
2623 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2624 
2625   const unsigned Opc = Inst.getOpcode();
2626   const MCInstrDesc &Desc = MII.get(Opc);
2627 
2628   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2629     return true;
2630 
2631   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2632   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2633     if (isCI() || isSI())
2634       return false;
2635   }
2636 
2637   return true;
2638 }
2639 
2640 static bool IsRevOpcode(const unsigned Opcode)
2641 {
2642   switch (Opcode) {
2643   case AMDGPU::V_SUBREV_F32_e32:
2644   case AMDGPU::V_SUBREV_F32_e64:
2645   case AMDGPU::V_SUBREV_F32_e32_si:
2646   case AMDGPU::V_SUBREV_F32_e32_vi:
2647   case AMDGPU::V_SUBREV_F32_e64_si:
2648   case AMDGPU::V_SUBREV_F32_e64_vi:
2649   case AMDGPU::V_SUBREV_I32_e32:
2650   case AMDGPU::V_SUBREV_I32_e64:
2651   case AMDGPU::V_SUBREV_I32_e32_si:
2652   case AMDGPU::V_SUBREV_I32_e64_si:
2653   case AMDGPU::V_SUBBREV_U32_e32:
2654   case AMDGPU::V_SUBBREV_U32_e64:
2655   case AMDGPU::V_SUBBREV_U32_e32_si:
2656   case AMDGPU::V_SUBBREV_U32_e32_vi:
2657   case AMDGPU::V_SUBBREV_U32_e64_si:
2658   case AMDGPU::V_SUBBREV_U32_e64_vi:
2659   case AMDGPU::V_SUBREV_U32_e32:
2660   case AMDGPU::V_SUBREV_U32_e64:
2661   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2662   case AMDGPU::V_SUBREV_U32_e32_vi:
2663   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2664   case AMDGPU::V_SUBREV_U32_e64_vi:
2665   case AMDGPU::V_SUBREV_F16_e32:
2666   case AMDGPU::V_SUBREV_F16_e64:
2667   case AMDGPU::V_SUBREV_F16_e32_vi:
2668   case AMDGPU::V_SUBREV_F16_e64_vi:
2669   case AMDGPU::V_SUBREV_U16_e32:
2670   case AMDGPU::V_SUBREV_U16_e64:
2671   case AMDGPU::V_SUBREV_U16_e32_vi:
2672   case AMDGPU::V_SUBREV_U16_e64_vi:
2673   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2674   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2675   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2676   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2677   case AMDGPU::V_LSHLREV_B32_e32_si:
2678   case AMDGPU::V_LSHLREV_B32_e64_si:
2679   case AMDGPU::V_LSHLREV_B16_e32_vi:
2680   case AMDGPU::V_LSHLREV_B16_e64_vi:
2681   case AMDGPU::V_LSHLREV_B32_e32_vi:
2682   case AMDGPU::V_LSHLREV_B32_e64_vi:
2683   case AMDGPU::V_LSHLREV_B64_vi:
2684   case AMDGPU::V_LSHRREV_B32_e32_si:
2685   case AMDGPU::V_LSHRREV_B32_e64_si:
2686   case AMDGPU::V_LSHRREV_B16_e32_vi:
2687   case AMDGPU::V_LSHRREV_B16_e64_vi:
2688   case AMDGPU::V_LSHRREV_B32_e32_vi:
2689   case AMDGPU::V_LSHRREV_B32_e64_vi:
2690   case AMDGPU::V_LSHRREV_B64_vi:
2691   case AMDGPU::V_ASHRREV_I32_e64_si:
2692   case AMDGPU::V_ASHRREV_I32_e32_si:
2693   case AMDGPU::V_ASHRREV_I16_e32_vi:
2694   case AMDGPU::V_ASHRREV_I16_e64_vi:
2695   case AMDGPU::V_ASHRREV_I32_e32_vi:
2696   case AMDGPU::V_ASHRREV_I32_e64_vi:
2697   case AMDGPU::V_ASHRREV_I64_vi:
2698   case AMDGPU::V_PK_LSHLREV_B16_vi:
2699   case AMDGPU::V_PK_LSHRREV_B16_vi:
2700   case AMDGPU::V_PK_ASHRREV_I16_vi:
2701     return true;
2702   default:
2703     return false;
2704   }
2705 }
2706 
2707 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2708 
2709   using namespace SIInstrFlags;
2710   const unsigned Opcode = Inst.getOpcode();
2711   const MCInstrDesc &Desc = MII.get(Opcode);
2712 
2713   // lds_direct register is defined so that it can be used
2714   // with 9-bit operands only. Ignore encodings which do not accept these.
2715   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2716     return true;
2717 
2718   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2719   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2720   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2721 
2722   const int SrcIndices[] = { Src1Idx, Src2Idx };
2723 
2724   // lds_direct cannot be specified as either src1 or src2.
2725   for (int SrcIdx : SrcIndices) {
2726     if (SrcIdx == -1) break;
2727     const MCOperand &Src = Inst.getOperand(SrcIdx);
2728     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2729       return false;
2730     }
2731   }
2732 
2733   if (Src0Idx == -1)
2734     return true;
2735 
2736   const MCOperand &Src = Inst.getOperand(Src0Idx);
2737   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2738     return true;
2739 
2740   // lds_direct is specified as src0. Check additional limitations.
2741   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2742 }
2743 
2744 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2745   unsigned Opcode = Inst.getOpcode();
2746   const MCInstrDesc &Desc = MII.get(Opcode);
2747   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2748     return true;
2749 
2750   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2751   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2752 
2753   const int OpIndices[] = { Src0Idx, Src1Idx };
2754 
2755   unsigned NumLiterals = 0;
2756   uint32_t LiteralValue;
2757 
2758   for (int OpIdx : OpIndices) {
2759     if (OpIdx == -1) break;
2760 
2761     const MCOperand &MO = Inst.getOperand(OpIdx);
2762     if (MO.isImm() &&
2763         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2764         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2765         !isInlineConstant(Inst, OpIdx)) {
2766       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2767       if (NumLiterals == 0 || LiteralValue != Value) {
2768         LiteralValue = Value;
2769         ++NumLiterals;
2770       }
2771     }
2772   }
2773 
2774   return NumLiterals <= 1;
2775 }
2776 
2777 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2778                                           const SMLoc &IDLoc) {
2779   if (!validateLdsDirect(Inst)) {
2780     Error(IDLoc,
2781       "invalid use of lds_direct");
2782     return false;
2783   }
2784   if (!validateSOPLiteral(Inst)) {
2785     Error(IDLoc,
2786       "only one literal operand is allowed");
2787     return false;
2788   }
2789   if (!validateConstantBusLimitations(Inst)) {
2790     Error(IDLoc,
2791       "invalid operand (violates constant bus restrictions)");
2792     return false;
2793   }
2794   if (!validateEarlyClobberLimitations(Inst)) {
2795     Error(IDLoc,
2796       "destination must be different than all sources");
2797     return false;
2798   }
2799   if (!validateIntClampSupported(Inst)) {
2800     Error(IDLoc,
2801       "integer clamping is not supported on this GPU");
2802     return false;
2803   }
2804   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2805   if (!validateMIMGD16(Inst)) {
2806     Error(IDLoc,
2807       "d16 modifier is not supported on this GPU");
2808     return false;
2809   }
2810   if (!validateMIMGDataSize(Inst)) {
2811     Error(IDLoc,
2812       "image data size does not match dmask and tfe");
2813     return false;
2814   }
2815   if (!validateMIMGAtomicDMask(Inst)) {
2816     Error(IDLoc,
2817       "invalid atomic image dmask");
2818     return false;
2819   }
2820   if (!validateMIMGGatherDMask(Inst)) {
2821     Error(IDLoc,
2822       "invalid image_gather dmask: only one bit must be set");
2823     return false;
2824   }
2825 
2826   return true;
2827 }
2828 
2829 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2830                                             const FeatureBitset &FBS,
2831                                             unsigned VariantID = 0);
2832 
2833 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2834                                               OperandVector &Operands,
2835                                               MCStreamer &Out,
2836                                               uint64_t &ErrorInfo,
2837                                               bool MatchingInlineAsm) {
2838   MCInst Inst;
2839   unsigned Result = Match_Success;
2840   for (auto Variant : getMatchedVariants()) {
2841     uint64_t EI;
2842     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2843                                   Variant);
2844     // We order match statuses from least to most specific. We use most specific
2845     // status as resulting
2846     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2847     if ((R == Match_Success) ||
2848         (R == Match_PreferE32) ||
2849         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2850         (R == Match_InvalidOperand && Result != Match_MissingFeature
2851                                    && Result != Match_PreferE32) ||
2852         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2853                                    && Result != Match_MissingFeature
2854                                    && Result != Match_PreferE32)) {
2855       Result = R;
2856       ErrorInfo = EI;
2857     }
2858     if (R == Match_Success)
2859       break;
2860   }
2861 
2862   switch (Result) {
2863   default: break;
2864   case Match_Success:
2865     if (!validateInstruction(Inst, IDLoc)) {
2866       return true;
2867     }
2868     Inst.setLoc(IDLoc);
2869     Out.EmitInstruction(Inst, getSTI());
2870     return false;
2871 
2872   case Match_MissingFeature:
2873     return Error(IDLoc, "instruction not supported on this GPU");
2874 
2875   case Match_MnemonicFail: {
2876     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2877     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2878         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2879     return Error(IDLoc, "invalid instruction" + Suggestion,
2880                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2881   }
2882 
2883   case Match_InvalidOperand: {
2884     SMLoc ErrorLoc = IDLoc;
2885     if (ErrorInfo != ~0ULL) {
2886       if (ErrorInfo >= Operands.size()) {
2887         return Error(IDLoc, "too few operands for instruction");
2888       }
2889       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2890       if (ErrorLoc == SMLoc())
2891         ErrorLoc = IDLoc;
2892     }
2893     return Error(ErrorLoc, "invalid operand for instruction");
2894   }
2895 
2896   case Match_PreferE32:
2897     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2898                         "should be encoded as e32");
2899   }
2900   llvm_unreachable("Implement any new match types added!");
2901 }
2902 
2903 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2904   int64_t Tmp = -1;
2905   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2906     return true;
2907   }
2908   if (getParser().parseAbsoluteExpression(Tmp)) {
2909     return true;
2910   }
2911   Ret = static_cast<uint32_t>(Tmp);
2912   return false;
2913 }
2914 
2915 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2916                                                uint32_t &Minor) {
2917   if (ParseAsAbsoluteExpression(Major))
2918     return TokError("invalid major version");
2919 
2920   if (getLexer().isNot(AsmToken::Comma))
2921     return TokError("minor version number required, comma expected");
2922   Lex();
2923 
2924   if (ParseAsAbsoluteExpression(Minor))
2925     return TokError("invalid minor version");
2926 
2927   return false;
2928 }
2929 
2930 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2931   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2932     return TokError("directive only supported for amdgcn architecture");
2933 
2934   std::string Target;
2935 
2936   SMLoc TargetStart = getTok().getLoc();
2937   if (getParser().parseEscapedString(Target))
2938     return true;
2939   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2940 
2941   std::string ExpectedTarget;
2942   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2943   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2944 
2945   if (Target != ExpectedTargetOS.str())
2946     return getParser().Error(TargetRange.Start, "target must match options",
2947                              TargetRange);
2948 
2949   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2950   return false;
2951 }
2952 
2953 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2954   return getParser().Error(Range.Start, "value out of range", Range);
2955 }
2956 
2957 bool AMDGPUAsmParser::calculateGPRBlocks(
2958     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2959     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2960     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2961     unsigned &SGPRBlocks) {
2962   // TODO(scott.linder): These calculations are duplicated from
2963   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2964   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2965 
2966   unsigned NumVGPRs = NextFreeVGPR;
2967   unsigned NumSGPRs = NextFreeSGPR;
2968   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2969 
2970   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2971       NumSGPRs > MaxAddressableNumSGPRs)
2972     return OutOfRangeError(SGPRRange);
2973 
2974   NumSGPRs +=
2975       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2976 
2977   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2978       NumSGPRs > MaxAddressableNumSGPRs)
2979     return OutOfRangeError(SGPRRange);
2980 
2981   if (Features.test(FeatureSGPRInitBug))
2982     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2983 
2984   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2985   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2986 
2987   return false;
2988 }
2989 
2990 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2991   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2992     return TokError("directive only supported for amdgcn architecture");
2993 
2994   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2995     return TokError("directive only supported for amdhsa OS");
2996 
2997   StringRef KernelName;
2998   if (getParser().parseIdentifier(KernelName))
2999     return true;
3000 
3001   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
3002 
3003   StringSet<> Seen;
3004 
3005   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3006 
3007   SMRange VGPRRange;
3008   uint64_t NextFreeVGPR = 0;
3009   SMRange SGPRRange;
3010   uint64_t NextFreeSGPR = 0;
3011   unsigned UserSGPRCount = 0;
3012   bool ReserveVCC = true;
3013   bool ReserveFlatScr = true;
3014   bool ReserveXNACK = hasXNACK();
3015 
3016   while (true) {
3017     while (getLexer().is(AsmToken::EndOfStatement))
3018       Lex();
3019 
3020     if (getLexer().isNot(AsmToken::Identifier))
3021       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3022 
3023     StringRef ID = getTok().getIdentifier();
3024     SMRange IDRange = getTok().getLocRange();
3025     Lex();
3026 
3027     if (ID == ".end_amdhsa_kernel")
3028       break;
3029 
3030     if (Seen.find(ID) != Seen.end())
3031       return TokError(".amdhsa_ directives cannot be repeated");
3032     Seen.insert(ID);
3033 
3034     SMLoc ValStart = getTok().getLoc();
3035     int64_t IVal;
3036     if (getParser().parseAbsoluteExpression(IVal))
3037       return true;
3038     SMLoc ValEnd = getTok().getLoc();
3039     SMRange ValRange = SMRange(ValStart, ValEnd);
3040 
3041     if (IVal < 0)
3042       return OutOfRangeError(ValRange);
3043 
3044     uint64_t Val = IVal;
3045 
3046 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3047   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3048     return OutOfRangeError(RANGE);                                             \
3049   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3050 
3051     if (ID == ".amdhsa_group_segment_fixed_size") {
3052       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3053         return OutOfRangeError(ValRange);
3054       KD.group_segment_fixed_size = Val;
3055     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3056       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3057         return OutOfRangeError(ValRange);
3058       KD.private_segment_fixed_size = Val;
3059     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3060       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3061                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3062                        Val, ValRange);
3063       UserSGPRCount += 4;
3064     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3065       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3066                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3067                        ValRange);
3068       UserSGPRCount += 2;
3069     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3070       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3071                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3072                        ValRange);
3073       UserSGPRCount += 2;
3074     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3075       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3076                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3077                        Val, ValRange);
3078       UserSGPRCount += 2;
3079     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3080       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3081                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3082                        ValRange);
3083       UserSGPRCount += 2;
3084     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3085       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3086                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3087                        ValRange);
3088       UserSGPRCount += 2;
3089     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3090       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3091                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3092                        Val, ValRange);
3093       UserSGPRCount += 1;
3094     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3095       PARSE_BITS_ENTRY(
3096           KD.compute_pgm_rsrc2,
3097           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3098           ValRange);
3099     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3100       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3101                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3102                        ValRange);
3103     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3104       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3105                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3106                        ValRange);
3107     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3109                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3110                        ValRange);
3111     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3112       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3113                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3114                        ValRange);
3115     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3116       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3117                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3118                        ValRange);
3119     } else if (ID == ".amdhsa_next_free_vgpr") {
3120       VGPRRange = ValRange;
3121       NextFreeVGPR = Val;
3122     } else if (ID == ".amdhsa_next_free_sgpr") {
3123       SGPRRange = ValRange;
3124       NextFreeSGPR = Val;
3125     } else if (ID == ".amdhsa_reserve_vcc") {
3126       if (!isUInt<1>(Val))
3127         return OutOfRangeError(ValRange);
3128       ReserveVCC = Val;
3129     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3130       if (IVersion.Major < 7)
3131         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3132                                  IDRange);
3133       if (!isUInt<1>(Val))
3134         return OutOfRangeError(ValRange);
3135       ReserveFlatScr = Val;
3136     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3137       if (IVersion.Major < 8)
3138         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3139                                  IDRange);
3140       if (!isUInt<1>(Val))
3141         return OutOfRangeError(ValRange);
3142       ReserveXNACK = Val;
3143     } else if (ID == ".amdhsa_float_round_mode_32") {
3144       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3145                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3146     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3147       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3148                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3149     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3150       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3151                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3152     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3153       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3154                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3155                        ValRange);
3156     } else if (ID == ".amdhsa_dx10_clamp") {
3157       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3158                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3159     } else if (ID == ".amdhsa_ieee_mode") {
3160       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3161                        Val, ValRange);
3162     } else if (ID == ".amdhsa_fp16_overflow") {
3163       if (IVersion.Major < 9)
3164         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3165                                  IDRange);
3166       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3167                        ValRange);
3168     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3169       PARSE_BITS_ENTRY(
3170           KD.compute_pgm_rsrc2,
3171           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3172           ValRange);
3173     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3174       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3175                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3176                        Val, ValRange);
3177     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3178       PARSE_BITS_ENTRY(
3179           KD.compute_pgm_rsrc2,
3180           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3181           ValRange);
3182     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3183       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3184                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3185                        Val, ValRange);
3186     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3187       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3188                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3189                        Val, ValRange);
3190     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3191       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3192                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3193                        Val, ValRange);
3194     } else if (ID == ".amdhsa_exception_int_div_zero") {
3195       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3196                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3197                        Val, ValRange);
3198     } else {
3199       return getParser().Error(IDRange.Start,
3200                                "unknown .amdhsa_kernel directive", IDRange);
3201     }
3202 
3203 #undef PARSE_BITS_ENTRY
3204   }
3205 
3206   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3207     return TokError(".amdhsa_next_free_vgpr directive is required");
3208 
3209   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3210     return TokError(".amdhsa_next_free_sgpr directive is required");
3211 
3212   unsigned VGPRBlocks;
3213   unsigned SGPRBlocks;
3214   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3215                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3216                          SGPRRange, VGPRBlocks, SGPRBlocks))
3217     return true;
3218 
3219   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3220           VGPRBlocks))
3221     return OutOfRangeError(VGPRRange);
3222   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3223                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3224 
3225   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3226           SGPRBlocks))
3227     return OutOfRangeError(SGPRRange);
3228   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3229                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3230                   SGPRBlocks);
3231 
3232   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3233     return TokError("too many user SGPRs enabled");
3234   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3235                   UserSGPRCount);
3236 
3237   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3238       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3239       ReserveFlatScr, ReserveXNACK);
3240   return false;
3241 }
3242 
3243 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3244   uint32_t Major;
3245   uint32_t Minor;
3246 
3247   if (ParseDirectiveMajorMinor(Major, Minor))
3248     return true;
3249 
3250   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3251   return false;
3252 }
3253 
3254 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3255   uint32_t Major;
3256   uint32_t Minor;
3257   uint32_t Stepping;
3258   StringRef VendorName;
3259   StringRef ArchName;
3260 
3261   // If this directive has no arguments, then use the ISA version for the
3262   // targeted GPU.
3263   if (getLexer().is(AsmToken::EndOfStatement)) {
3264     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3265     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3266                                                       ISA.Stepping,
3267                                                       "AMD", "AMDGPU");
3268     return false;
3269   }
3270 
3271   if (ParseDirectiveMajorMinor(Major, Minor))
3272     return true;
3273 
3274   if (getLexer().isNot(AsmToken::Comma))
3275     return TokError("stepping version number required, comma expected");
3276   Lex();
3277 
3278   if (ParseAsAbsoluteExpression(Stepping))
3279     return TokError("invalid stepping version");
3280 
3281   if (getLexer().isNot(AsmToken::Comma))
3282     return TokError("vendor name required, comma expected");
3283   Lex();
3284 
3285   if (getLexer().isNot(AsmToken::String))
3286     return TokError("invalid vendor name");
3287 
3288   VendorName = getLexer().getTok().getStringContents();
3289   Lex();
3290 
3291   if (getLexer().isNot(AsmToken::Comma))
3292     return TokError("arch name required, comma expected");
3293   Lex();
3294 
3295   if (getLexer().isNot(AsmToken::String))
3296     return TokError("invalid arch name");
3297 
3298   ArchName = getLexer().getTok().getStringContents();
3299   Lex();
3300 
3301   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3302                                                     VendorName, ArchName);
3303   return false;
3304 }
3305 
3306 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3307                                                amd_kernel_code_t &Header) {
3308   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3309   // assembly for backwards compatibility.
3310   if (ID == "max_scratch_backing_memory_byte_size") {
3311     Parser.eatToEndOfStatement();
3312     return false;
3313   }
3314 
3315   SmallString<40> ErrStr;
3316   raw_svector_ostream Err(ErrStr);
3317   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3318     return TokError(Err.str());
3319   }
3320   Lex();
3321   return false;
3322 }
3323 
3324 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3325   amd_kernel_code_t Header;
3326   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3327 
3328   while (true) {
3329     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3330     // will set the current token to EndOfStatement.
3331     while(getLexer().is(AsmToken::EndOfStatement))
3332       Lex();
3333 
3334     if (getLexer().isNot(AsmToken::Identifier))
3335       return TokError("expected value identifier or .end_amd_kernel_code_t");
3336 
3337     StringRef ID = getLexer().getTok().getIdentifier();
3338     Lex();
3339 
3340     if (ID == ".end_amd_kernel_code_t")
3341       break;
3342 
3343     if (ParseAMDKernelCodeTValue(ID, Header))
3344       return true;
3345   }
3346 
3347   getTargetStreamer().EmitAMDKernelCodeT(Header);
3348 
3349   return false;
3350 }
3351 
3352 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3353   if (getLexer().isNot(AsmToken::Identifier))
3354     return TokError("expected symbol name");
3355 
3356   StringRef KernelName = Parser.getTok().getString();
3357 
3358   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3359                                            ELF::STT_AMDGPU_HSA_KERNEL);
3360   Lex();
3361   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3362     KernelScope.initialize(getContext());
3363   return false;
3364 }
3365 
3366 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3367   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3368     return Error(getParser().getTok().getLoc(),
3369                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3370                  "architectures");
3371   }
3372 
3373   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3374 
3375   std::string ISAVersionStringFromSTI;
3376   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3377   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3378 
3379   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3380     return Error(getParser().getTok().getLoc(),
3381                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3382                  "arguments specified through the command line");
3383   }
3384 
3385   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3386   Lex();
3387 
3388   return false;
3389 }
3390 
3391 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3392   const char *AssemblerDirectiveBegin;
3393   const char *AssemblerDirectiveEnd;
3394   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3395       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3396           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3397                             HSAMD::V3::AssemblerDirectiveEnd)
3398           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3399                             HSAMD::AssemblerDirectiveEnd);
3400 
3401   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3402     return Error(getParser().getTok().getLoc(),
3403                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3404                  "not available on non-amdhsa OSes")).str());
3405   }
3406 
3407   std::string HSAMetadataString;
3408   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3409                           HSAMetadataString))
3410     return true;
3411 
3412   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3413     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3414       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3415   } else {
3416     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3417       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3418   }
3419 
3420   return false;
3421 }
3422 
3423 /// Common code to parse out a block of text (typically YAML) between start and
3424 /// end directives.
3425 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3426                                           const char *AssemblerDirectiveEnd,
3427                                           std::string &CollectString) {
3428 
3429   raw_string_ostream CollectStream(CollectString);
3430 
3431   getLexer().setSkipSpace(false);
3432 
3433   bool FoundEnd = false;
3434   while (!getLexer().is(AsmToken::Eof)) {
3435     while (getLexer().is(AsmToken::Space)) {
3436       CollectStream << getLexer().getTok().getString();
3437       Lex();
3438     }
3439 
3440     if (getLexer().is(AsmToken::Identifier)) {
3441       StringRef ID = getLexer().getTok().getIdentifier();
3442       if (ID == AssemblerDirectiveEnd) {
3443         Lex();
3444         FoundEnd = true;
3445         break;
3446       }
3447     }
3448 
3449     CollectStream << Parser.parseStringToEndOfStatement()
3450                   << getContext().getAsmInfo()->getSeparatorString();
3451 
3452     Parser.eatToEndOfStatement();
3453   }
3454 
3455   getLexer().setSkipSpace(true);
3456 
3457   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3458     return TokError(Twine("expected directive ") +
3459                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3460   }
3461 
3462   CollectStream.flush();
3463   return false;
3464 }
3465 
3466 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3467 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3468   std::string String;
3469   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3470                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3471     return true;
3472 
3473   auto PALMetadata = getTargetStreamer().getPALMetadata();
3474   if (!PALMetadata->setFromString(String))
3475     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3476   return false;
3477 }
3478 
3479 /// Parse the assembler directive for old linear-format PAL metadata.
3480 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3481   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3482     return Error(getParser().getTok().getLoc(),
3483                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3484                  "not available on non-amdpal OSes")).str());
3485   }
3486 
3487   auto PALMetadata = getTargetStreamer().getPALMetadata();
3488   PALMetadata->setLegacy();
3489   for (;;) {
3490     uint32_t Key, Value;
3491     if (ParseAsAbsoluteExpression(Key)) {
3492       return TokError(Twine("invalid value in ") +
3493                       Twine(PALMD::AssemblerDirective));
3494     }
3495     if (getLexer().isNot(AsmToken::Comma)) {
3496       return TokError(Twine("expected an even number of values in ") +
3497                       Twine(PALMD::AssemblerDirective));
3498     }
3499     Lex();
3500     if (ParseAsAbsoluteExpression(Value)) {
3501       return TokError(Twine("invalid value in ") +
3502                       Twine(PALMD::AssemblerDirective));
3503     }
3504     PALMetadata->setRegister(Key, Value);
3505     if (getLexer().isNot(AsmToken::Comma))
3506       break;
3507     Lex();
3508   }
3509   return false;
3510 }
3511 
3512 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3513   StringRef IDVal = DirectiveID.getString();
3514 
3515   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3516     if (IDVal == ".amdgcn_target")
3517       return ParseDirectiveAMDGCNTarget();
3518 
3519     if (IDVal == ".amdhsa_kernel")
3520       return ParseDirectiveAMDHSAKernel();
3521 
3522     // TODO: Restructure/combine with PAL metadata directive.
3523     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3524       return ParseDirectiveHSAMetadata();
3525   } else {
3526     if (IDVal == ".hsa_code_object_version")
3527       return ParseDirectiveHSACodeObjectVersion();
3528 
3529     if (IDVal == ".hsa_code_object_isa")
3530       return ParseDirectiveHSACodeObjectISA();
3531 
3532     if (IDVal == ".amd_kernel_code_t")
3533       return ParseDirectiveAMDKernelCodeT();
3534 
3535     if (IDVal == ".amdgpu_hsa_kernel")
3536       return ParseDirectiveAMDGPUHsaKernel();
3537 
3538     if (IDVal == ".amd_amdgpu_isa")
3539       return ParseDirectiveISAVersion();
3540 
3541     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3542       return ParseDirectiveHSAMetadata();
3543   }
3544 
3545   if (IDVal == PALMD::AssemblerDirectiveBegin)
3546     return ParseDirectivePALMetadataBegin();
3547 
3548   if (IDVal == PALMD::AssemblerDirective)
3549     return ParseDirectivePALMetadata();
3550 
3551   return true;
3552 }
3553 
3554 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3555                                            unsigned RegNo) const {
3556 
3557   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3558        R.isValid(); ++R) {
3559     if (*R == RegNo)
3560       return isGFX9();
3561   }
3562 
3563   switch (RegNo) {
3564   case AMDGPU::TBA:
3565   case AMDGPU::TBA_LO:
3566   case AMDGPU::TBA_HI:
3567   case AMDGPU::TMA:
3568   case AMDGPU::TMA_LO:
3569   case AMDGPU::TMA_HI:
3570     return !isGFX9();
3571   case AMDGPU::XNACK_MASK:
3572   case AMDGPU::XNACK_MASK_LO:
3573   case AMDGPU::XNACK_MASK_HI:
3574     return !isCI() && !isSI() && hasXNACK();
3575   default:
3576     break;
3577   }
3578 
3579   if (isInlineValue(RegNo))
3580     return !isCI() && !isSI() && !isVI();
3581 
3582   if (isCI())
3583     return true;
3584 
3585   if (isSI()) {
3586     // No flat_scr
3587     switch (RegNo) {
3588     case AMDGPU::FLAT_SCR:
3589     case AMDGPU::FLAT_SCR_LO:
3590     case AMDGPU::FLAT_SCR_HI:
3591       return false;
3592     default:
3593       return true;
3594     }
3595   }
3596 
3597   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3598   // SI/CI have.
3599   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3600        R.isValid(); ++R) {
3601     if (*R == RegNo)
3602       return false;
3603   }
3604 
3605   return true;
3606 }
3607 
3608 OperandMatchResultTy
3609 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3610   // Try to parse with a custom parser
3611   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3612 
3613   // If we successfully parsed the operand or if there as an error parsing,
3614   // we are done.
3615   //
3616   // If we are parsing after we reach EndOfStatement then this means we
3617   // are appending default values to the Operands list.  This is only done
3618   // by custom parser, so we shouldn't continue on to the generic parsing.
3619   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3620       getLexer().is(AsmToken::EndOfStatement))
3621     return ResTy;
3622 
3623   ResTy = parseRegOrImm(Operands);
3624 
3625   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3626     return ResTy;
3627 
3628   const auto &Tok = Parser.getTok();
3629   SMLoc S = Tok.getLoc();
3630 
3631   const MCExpr *Expr = nullptr;
3632   if (!Parser.parseExpression(Expr)) {
3633     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3634     return MatchOperand_Success;
3635   }
3636 
3637   // Possibly this is an instruction flag like 'gds'.
3638   if (Tok.getKind() == AsmToken::Identifier) {
3639     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3640     Parser.Lex();
3641     return MatchOperand_Success;
3642   }
3643 
3644   return MatchOperand_NoMatch;
3645 }
3646 
3647 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3648   // Clear any forced encodings from the previous instruction.
3649   setForcedEncodingSize(0);
3650   setForcedDPP(false);
3651   setForcedSDWA(false);
3652 
3653   if (Name.endswith("_e64")) {
3654     setForcedEncodingSize(64);
3655     return Name.substr(0, Name.size() - 4);
3656   } else if (Name.endswith("_e32")) {
3657     setForcedEncodingSize(32);
3658     return Name.substr(0, Name.size() - 4);
3659   } else if (Name.endswith("_dpp")) {
3660     setForcedDPP(true);
3661     return Name.substr(0, Name.size() - 4);
3662   } else if (Name.endswith("_sdwa")) {
3663     setForcedSDWA(true);
3664     return Name.substr(0, Name.size() - 5);
3665   }
3666   return Name;
3667 }
3668 
3669 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3670                                        StringRef Name,
3671                                        SMLoc NameLoc, OperandVector &Operands) {
3672   // Add the instruction mnemonic
3673   Name = parseMnemonicSuffix(Name);
3674   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3675 
3676   while (!getLexer().is(AsmToken::EndOfStatement)) {
3677     OperandMatchResultTy Res = parseOperand(Operands, Name);
3678 
3679     // Eat the comma or space if there is one.
3680     if (getLexer().is(AsmToken::Comma))
3681       Parser.Lex();
3682 
3683     switch (Res) {
3684       case MatchOperand_Success: break;
3685       case MatchOperand_ParseFail:
3686         Error(getLexer().getLoc(), "failed parsing operand.");
3687         while (!getLexer().is(AsmToken::EndOfStatement)) {
3688           Parser.Lex();
3689         }
3690         return true;
3691       case MatchOperand_NoMatch:
3692         Error(getLexer().getLoc(), "not a valid operand.");
3693         while (!getLexer().is(AsmToken::EndOfStatement)) {
3694           Parser.Lex();
3695         }
3696         return true;
3697     }
3698   }
3699 
3700   return false;
3701 }
3702 
3703 //===----------------------------------------------------------------------===//
3704 // Utility functions
3705 //===----------------------------------------------------------------------===//
3706 
3707 OperandMatchResultTy
3708 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3709   switch(getLexer().getKind()) {
3710     default: return MatchOperand_NoMatch;
3711     case AsmToken::Identifier: {
3712       StringRef Name = Parser.getTok().getString();
3713       if (!Name.equals(Prefix)) {
3714         return MatchOperand_NoMatch;
3715       }
3716 
3717       Parser.Lex();
3718       if (getLexer().isNot(AsmToken::Colon))
3719         return MatchOperand_ParseFail;
3720 
3721       Parser.Lex();
3722 
3723       bool IsMinus = false;
3724       if (getLexer().getKind() == AsmToken::Minus) {
3725         Parser.Lex();
3726         IsMinus = true;
3727       }
3728 
3729       if (getLexer().isNot(AsmToken::Integer))
3730         return MatchOperand_ParseFail;
3731 
3732       if (getParser().parseAbsoluteExpression(Int))
3733         return MatchOperand_ParseFail;
3734 
3735       if (IsMinus)
3736         Int = -Int;
3737       break;
3738     }
3739   }
3740   return MatchOperand_Success;
3741 }
3742 
3743 OperandMatchResultTy
3744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3745                                     AMDGPUOperand::ImmTy ImmTy,
3746                                     bool (*ConvertResult)(int64_t&)) {
3747   SMLoc S = Parser.getTok().getLoc();
3748   int64_t Value = 0;
3749 
3750   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3751   if (Res != MatchOperand_Success)
3752     return Res;
3753 
3754   if (ConvertResult && !ConvertResult(Value)) {
3755     return MatchOperand_ParseFail;
3756   }
3757 
3758   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3759   return MatchOperand_Success;
3760 }
3761 
3762 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3763   const char *Prefix,
3764   OperandVector &Operands,
3765   AMDGPUOperand::ImmTy ImmTy,
3766   bool (*ConvertResult)(int64_t&)) {
3767   StringRef Name = Parser.getTok().getString();
3768   if (!Name.equals(Prefix))
3769     return MatchOperand_NoMatch;
3770 
3771   Parser.Lex();
3772   if (getLexer().isNot(AsmToken::Colon))
3773     return MatchOperand_ParseFail;
3774 
3775   Parser.Lex();
3776   if (getLexer().isNot(AsmToken::LBrac))
3777     return MatchOperand_ParseFail;
3778   Parser.Lex();
3779 
3780   unsigned Val = 0;
3781   SMLoc S = Parser.getTok().getLoc();
3782 
3783   // FIXME: How to verify the number of elements matches the number of src
3784   // operands?
3785   for (int I = 0; I < 4; ++I) {
3786     if (I != 0) {
3787       if (getLexer().is(AsmToken::RBrac))
3788         break;
3789 
3790       if (getLexer().isNot(AsmToken::Comma))
3791         return MatchOperand_ParseFail;
3792       Parser.Lex();
3793     }
3794 
3795     if (getLexer().isNot(AsmToken::Integer))
3796       return MatchOperand_ParseFail;
3797 
3798     int64_t Op;
3799     if (getParser().parseAbsoluteExpression(Op))
3800       return MatchOperand_ParseFail;
3801 
3802     if (Op != 0 && Op != 1)
3803       return MatchOperand_ParseFail;
3804     Val |= (Op << I);
3805   }
3806 
3807   Parser.Lex();
3808   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3809   return MatchOperand_Success;
3810 }
3811 
3812 OperandMatchResultTy
3813 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3814                                AMDGPUOperand::ImmTy ImmTy) {
3815   int64_t Bit = 0;
3816   SMLoc S = Parser.getTok().getLoc();
3817 
3818   // We are at the end of the statement, and this is a default argument, so
3819   // use a default value.
3820   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3821     switch(getLexer().getKind()) {
3822       case AsmToken::Identifier: {
3823         StringRef Tok = Parser.getTok().getString();
3824         if (Tok == Name) {
3825           if (Tok == "r128" && isGFX9())
3826             Error(S, "r128 modifier is not supported on this GPU");
3827           if (Tok == "a16" && !isGFX9())
3828             Error(S, "a16 modifier is not supported on this GPU");
3829           Bit = 1;
3830           Parser.Lex();
3831         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3832           Bit = 0;
3833           Parser.Lex();
3834         } else {
3835           return MatchOperand_NoMatch;
3836         }
3837         break;
3838       }
3839       default:
3840         return MatchOperand_NoMatch;
3841     }
3842   }
3843 
3844   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3845   return MatchOperand_Success;
3846 }
3847 
3848 static void addOptionalImmOperand(
3849   MCInst& Inst, const OperandVector& Operands,
3850   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3851   AMDGPUOperand::ImmTy ImmT,
3852   int64_t Default = 0) {
3853   auto i = OptionalIdx.find(ImmT);
3854   if (i != OptionalIdx.end()) {
3855     unsigned Idx = i->second;
3856     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3857   } else {
3858     Inst.addOperand(MCOperand::createImm(Default));
3859   }
3860 }
3861 
3862 OperandMatchResultTy
3863 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3864   if (getLexer().isNot(AsmToken::Identifier)) {
3865     return MatchOperand_NoMatch;
3866   }
3867   StringRef Tok = Parser.getTok().getString();
3868   if (Tok != Prefix) {
3869     return MatchOperand_NoMatch;
3870   }
3871 
3872   Parser.Lex();
3873   if (getLexer().isNot(AsmToken::Colon)) {
3874     return MatchOperand_ParseFail;
3875   }
3876 
3877   Parser.Lex();
3878   if (getLexer().isNot(AsmToken::Identifier)) {
3879     return MatchOperand_ParseFail;
3880   }
3881 
3882   Value = Parser.getTok().getString();
3883   return MatchOperand_Success;
3884 }
3885 
3886 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3887 // values to live in a joint format operand in the MCInst encoding.
3888 OperandMatchResultTy
3889 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3890   SMLoc S = Parser.getTok().getLoc();
3891   int64_t Dfmt = 0, Nfmt = 0;
3892   // dfmt and nfmt can appear in either order, and each is optional.
3893   bool GotDfmt = false, GotNfmt = false;
3894   while (!GotDfmt || !GotNfmt) {
3895     if (!GotDfmt) {
3896       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3897       if (Res != MatchOperand_NoMatch) {
3898         if (Res != MatchOperand_Success)
3899           return Res;
3900         if (Dfmt >= 16) {
3901           Error(Parser.getTok().getLoc(), "out of range dfmt");
3902           return MatchOperand_ParseFail;
3903         }
3904         GotDfmt = true;
3905         Parser.Lex();
3906         continue;
3907       }
3908     }
3909     if (!GotNfmt) {
3910       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3911       if (Res != MatchOperand_NoMatch) {
3912         if (Res != MatchOperand_Success)
3913           return Res;
3914         if (Nfmt >= 8) {
3915           Error(Parser.getTok().getLoc(), "out of range nfmt");
3916           return MatchOperand_ParseFail;
3917         }
3918         GotNfmt = true;
3919         Parser.Lex();
3920         continue;
3921       }
3922     }
3923     break;
3924   }
3925   if (!GotDfmt && !GotNfmt)
3926     return MatchOperand_NoMatch;
3927   auto Format = Dfmt | Nfmt << 4;
3928   Operands.push_back(
3929       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3930   return MatchOperand_Success;
3931 }
3932 
3933 //===----------------------------------------------------------------------===//
3934 // ds
3935 //===----------------------------------------------------------------------===//
3936 
3937 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3938                                     const OperandVector &Operands) {
3939   OptionalImmIndexMap OptionalIdx;
3940 
3941   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3942     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3943 
3944     // Add the register arguments
3945     if (Op.isReg()) {
3946       Op.addRegOperands(Inst, 1);
3947       continue;
3948     }
3949 
3950     // Handle optional arguments
3951     OptionalIdx[Op.getImmTy()] = i;
3952   }
3953 
3954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3956   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3957 
3958   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3959 }
3960 
3961 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3962                                 bool IsGdsHardcoded) {
3963   OptionalImmIndexMap OptionalIdx;
3964 
3965   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3966     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3967 
3968     // Add the register arguments
3969     if (Op.isReg()) {
3970       Op.addRegOperands(Inst, 1);
3971       continue;
3972     }
3973 
3974     if (Op.isToken() && Op.getToken() == "gds") {
3975       IsGdsHardcoded = true;
3976       continue;
3977     }
3978 
3979     // Handle optional arguments
3980     OptionalIdx[Op.getImmTy()] = i;
3981   }
3982 
3983   AMDGPUOperand::ImmTy OffsetType =
3984     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3985      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3986                                                       AMDGPUOperand::ImmTyOffset;
3987 
3988   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3989 
3990   if (!IsGdsHardcoded) {
3991     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3992   }
3993   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3994 }
3995 
3996 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3997   OptionalImmIndexMap OptionalIdx;
3998 
3999   unsigned OperandIdx[4];
4000   unsigned EnMask = 0;
4001   int SrcIdx = 0;
4002 
4003   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4004     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4005 
4006     // Add the register arguments
4007     if (Op.isReg()) {
4008       assert(SrcIdx < 4);
4009       OperandIdx[SrcIdx] = Inst.size();
4010       Op.addRegOperands(Inst, 1);
4011       ++SrcIdx;
4012       continue;
4013     }
4014 
4015     if (Op.isOff()) {
4016       assert(SrcIdx < 4);
4017       OperandIdx[SrcIdx] = Inst.size();
4018       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4019       ++SrcIdx;
4020       continue;
4021     }
4022 
4023     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4024       Op.addImmOperands(Inst, 1);
4025       continue;
4026     }
4027 
4028     if (Op.isToken() && Op.getToken() == "done")
4029       continue;
4030 
4031     // Handle optional arguments
4032     OptionalIdx[Op.getImmTy()] = i;
4033   }
4034 
4035   assert(SrcIdx == 4);
4036 
4037   bool Compr = false;
4038   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4039     Compr = true;
4040     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4041     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4042     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4043   }
4044 
4045   for (auto i = 0; i < SrcIdx; ++i) {
4046     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4047       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4048     }
4049   }
4050 
4051   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4052   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4053 
4054   Inst.addOperand(MCOperand::createImm(EnMask));
4055 }
4056 
4057 //===----------------------------------------------------------------------===//
4058 // s_waitcnt
4059 //===----------------------------------------------------------------------===//
4060 
4061 static bool
4062 encodeCnt(
4063   const AMDGPU::IsaVersion ISA,
4064   int64_t &IntVal,
4065   int64_t CntVal,
4066   bool Saturate,
4067   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4068   unsigned (*decode)(const IsaVersion &Version, unsigned))
4069 {
4070   bool Failed = false;
4071 
4072   IntVal = encode(ISA, IntVal, CntVal);
4073   if (CntVal != decode(ISA, IntVal)) {
4074     if (Saturate) {
4075       IntVal = encode(ISA, IntVal, -1);
4076     } else {
4077       Failed = true;
4078     }
4079   }
4080   return Failed;
4081 }
4082 
4083 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4084   StringRef CntName = Parser.getTok().getString();
4085   int64_t CntVal;
4086 
4087   Parser.Lex();
4088   if (getLexer().isNot(AsmToken::LParen))
4089     return true;
4090 
4091   Parser.Lex();
4092   if (getLexer().isNot(AsmToken::Integer))
4093     return true;
4094 
4095   SMLoc ValLoc = Parser.getTok().getLoc();
4096   if (getParser().parseAbsoluteExpression(CntVal))
4097     return true;
4098 
4099   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4100 
4101   bool Failed = true;
4102   bool Sat = CntName.endswith("_sat");
4103 
4104   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4105     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4106   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4107     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4108   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4109     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4110   }
4111 
4112   if (Failed) {
4113     Error(ValLoc, "too large value for " + CntName);
4114     return true;
4115   }
4116 
4117   if (getLexer().isNot(AsmToken::RParen)) {
4118     return true;
4119   }
4120 
4121   Parser.Lex();
4122   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4123     const AsmToken NextToken = getLexer().peekTok();
4124     if (NextToken.is(AsmToken::Identifier)) {
4125       Parser.Lex();
4126     }
4127   }
4128 
4129   return false;
4130 }
4131 
4132 OperandMatchResultTy
4133 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4134   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4135   int64_t Waitcnt = getWaitcntBitMask(ISA);
4136   SMLoc S = Parser.getTok().getLoc();
4137 
4138   switch(getLexer().getKind()) {
4139     default: return MatchOperand_ParseFail;
4140     case AsmToken::Integer:
4141       // The operand can be an integer value.
4142       if (getParser().parseAbsoluteExpression(Waitcnt))
4143         return MatchOperand_ParseFail;
4144       break;
4145 
4146     case AsmToken::Identifier:
4147       do {
4148         if (parseCnt(Waitcnt))
4149           return MatchOperand_ParseFail;
4150       } while(getLexer().isNot(AsmToken::EndOfStatement));
4151       break;
4152   }
4153   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4154   return MatchOperand_Success;
4155 }
4156 
4157 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4158                                           int64_t &Width) {
4159   using namespace llvm::AMDGPU::Hwreg;
4160 
4161   if (Parser.getTok().getString() != "hwreg")
4162     return true;
4163   Parser.Lex();
4164 
4165   if (getLexer().isNot(AsmToken::LParen))
4166     return true;
4167   Parser.Lex();
4168 
4169   if (getLexer().is(AsmToken::Identifier)) {
4170     HwReg.IsSymbolic = true;
4171     HwReg.Id = ID_UNKNOWN_;
4172     const StringRef tok = Parser.getTok().getString();
4173     int Last = ID_SYMBOLIC_LAST_;
4174     if (isSI() || isCI() || isVI())
4175       Last = ID_SYMBOLIC_FIRST_GFX9_;
4176     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4177       if (tok == IdSymbolic[i]) {
4178         HwReg.Id = i;
4179         break;
4180       }
4181     }
4182     Parser.Lex();
4183   } else {
4184     HwReg.IsSymbolic = false;
4185     if (getLexer().isNot(AsmToken::Integer))
4186       return true;
4187     if (getParser().parseAbsoluteExpression(HwReg.Id))
4188       return true;
4189   }
4190 
4191   if (getLexer().is(AsmToken::RParen)) {
4192     Parser.Lex();
4193     return false;
4194   }
4195 
4196   // optional params
4197   if (getLexer().isNot(AsmToken::Comma))
4198     return true;
4199   Parser.Lex();
4200 
4201   if (getLexer().isNot(AsmToken::Integer))
4202     return true;
4203   if (getParser().parseAbsoluteExpression(Offset))
4204     return true;
4205 
4206   if (getLexer().isNot(AsmToken::Comma))
4207     return true;
4208   Parser.Lex();
4209 
4210   if (getLexer().isNot(AsmToken::Integer))
4211     return true;
4212   if (getParser().parseAbsoluteExpression(Width))
4213     return true;
4214 
4215   if (getLexer().isNot(AsmToken::RParen))
4216     return true;
4217   Parser.Lex();
4218 
4219   return false;
4220 }
4221 
4222 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4223   using namespace llvm::AMDGPU::Hwreg;
4224 
4225   int64_t Imm16Val = 0;
4226   SMLoc S = Parser.getTok().getLoc();
4227 
4228   switch(getLexer().getKind()) {
4229     default: return MatchOperand_NoMatch;
4230     case AsmToken::Integer:
4231       // The operand can be an integer value.
4232       if (getParser().parseAbsoluteExpression(Imm16Val))
4233         return MatchOperand_NoMatch;
4234       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4235         Error(S, "invalid immediate: only 16-bit values are legal");
4236         // Do not return error code, but create an imm operand anyway and proceed
4237         // to the next operand, if any. That avoids unneccessary error messages.
4238       }
4239       break;
4240 
4241     case AsmToken::Identifier: {
4242         OperandInfoTy HwReg(ID_UNKNOWN_);
4243         int64_t Offset = OFFSET_DEFAULT_;
4244         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4245         if (parseHwregConstruct(HwReg, Offset, Width))
4246           return MatchOperand_ParseFail;
4247         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4248           if (HwReg.IsSymbolic)
4249             Error(S, "invalid symbolic name of hardware register");
4250           else
4251             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4252         }
4253         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4254           Error(S, "invalid bit offset: only 5-bit values are legal");
4255         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4256           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4257         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4258       }
4259       break;
4260   }
4261   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4262   return MatchOperand_Success;
4263 }
4264 
4265 bool AMDGPUOperand::isSWaitCnt() const {
4266   return isImm();
4267 }
4268 
4269 bool AMDGPUOperand::isHwreg() const {
4270   return isImmTy(ImmTyHwreg);
4271 }
4272 
4273 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4274   using namespace llvm::AMDGPU::SendMsg;
4275 
4276   if (Parser.getTok().getString() != "sendmsg")
4277     return true;
4278   Parser.Lex();
4279 
4280   if (getLexer().isNot(AsmToken::LParen))
4281     return true;
4282   Parser.Lex();
4283 
4284   if (getLexer().is(AsmToken::Identifier)) {
4285     Msg.IsSymbolic = true;
4286     Msg.Id = ID_UNKNOWN_;
4287     const std::string tok = Parser.getTok().getString();
4288     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4289       switch(i) {
4290         default: continue; // Omit gaps.
4291         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4292       }
4293       if (tok == IdSymbolic[i]) {
4294         Msg.Id = i;
4295         break;
4296       }
4297     }
4298     Parser.Lex();
4299   } else {
4300     Msg.IsSymbolic = false;
4301     if (getLexer().isNot(AsmToken::Integer))
4302       return true;
4303     if (getParser().parseAbsoluteExpression(Msg.Id))
4304       return true;
4305     if (getLexer().is(AsmToken::Integer))
4306       if (getParser().parseAbsoluteExpression(Msg.Id))
4307         Msg.Id = ID_UNKNOWN_;
4308   }
4309   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4310     return false;
4311 
4312   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4313     if (getLexer().isNot(AsmToken::RParen))
4314       return true;
4315     Parser.Lex();
4316     return false;
4317   }
4318 
4319   if (getLexer().isNot(AsmToken::Comma))
4320     return true;
4321   Parser.Lex();
4322 
4323   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4324   Operation.Id = ID_UNKNOWN_;
4325   if (getLexer().is(AsmToken::Identifier)) {
4326     Operation.IsSymbolic = true;
4327     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4328     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4329     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4330     const StringRef Tok = Parser.getTok().getString();
4331     for (int i = F; i < L; ++i) {
4332       if (Tok == S[i]) {
4333         Operation.Id = i;
4334         break;
4335       }
4336     }
4337     Parser.Lex();
4338   } else {
4339     Operation.IsSymbolic = false;
4340     if (getLexer().isNot(AsmToken::Integer))
4341       return true;
4342     if (getParser().parseAbsoluteExpression(Operation.Id))
4343       return true;
4344   }
4345 
4346   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4347     // Stream id is optional.
4348     if (getLexer().is(AsmToken::RParen)) {
4349       Parser.Lex();
4350       return false;
4351     }
4352 
4353     if (getLexer().isNot(AsmToken::Comma))
4354       return true;
4355     Parser.Lex();
4356 
4357     if (getLexer().isNot(AsmToken::Integer))
4358       return true;
4359     if (getParser().parseAbsoluteExpression(StreamId))
4360       return true;
4361   }
4362 
4363   if (getLexer().isNot(AsmToken::RParen))
4364     return true;
4365   Parser.Lex();
4366   return false;
4367 }
4368 
4369 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4370   if (getLexer().getKind() != AsmToken::Identifier)
4371     return MatchOperand_NoMatch;
4372 
4373   StringRef Str = Parser.getTok().getString();
4374   int Slot = StringSwitch<int>(Str)
4375     .Case("p10", 0)
4376     .Case("p20", 1)
4377     .Case("p0", 2)
4378     .Default(-1);
4379 
4380   SMLoc S = Parser.getTok().getLoc();
4381   if (Slot == -1)
4382     return MatchOperand_ParseFail;
4383 
4384   Parser.Lex();
4385   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4386                                               AMDGPUOperand::ImmTyInterpSlot));
4387   return MatchOperand_Success;
4388 }
4389 
4390 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4391   if (getLexer().getKind() != AsmToken::Identifier)
4392     return MatchOperand_NoMatch;
4393 
4394   StringRef Str = Parser.getTok().getString();
4395   if (!Str.startswith("attr"))
4396     return MatchOperand_NoMatch;
4397 
4398   StringRef Chan = Str.take_back(2);
4399   int AttrChan = StringSwitch<int>(Chan)
4400     .Case(".x", 0)
4401     .Case(".y", 1)
4402     .Case(".z", 2)
4403     .Case(".w", 3)
4404     .Default(-1);
4405   if (AttrChan == -1)
4406     return MatchOperand_ParseFail;
4407 
4408   Str = Str.drop_back(2).drop_front(4);
4409 
4410   uint8_t Attr;
4411   if (Str.getAsInteger(10, Attr))
4412     return MatchOperand_ParseFail;
4413 
4414   SMLoc S = Parser.getTok().getLoc();
4415   Parser.Lex();
4416   if (Attr > 63) {
4417     Error(S, "out of bounds attr");
4418     return MatchOperand_Success;
4419   }
4420 
4421   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4422 
4423   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4424                                               AMDGPUOperand::ImmTyInterpAttr));
4425   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4426                                               AMDGPUOperand::ImmTyAttrChan));
4427   return MatchOperand_Success;
4428 }
4429 
4430 void AMDGPUAsmParser::errorExpTgt() {
4431   Error(Parser.getTok().getLoc(), "invalid exp target");
4432 }
4433 
4434 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4435                                                       uint8_t &Val) {
4436   if (Str == "null") {
4437     Val = 9;
4438     return MatchOperand_Success;
4439   }
4440 
4441   if (Str.startswith("mrt")) {
4442     Str = Str.drop_front(3);
4443     if (Str == "z") { // == mrtz
4444       Val = 8;
4445       return MatchOperand_Success;
4446     }
4447 
4448     if (Str.getAsInteger(10, Val))
4449       return MatchOperand_ParseFail;
4450 
4451     if (Val > 7)
4452       errorExpTgt();
4453 
4454     return MatchOperand_Success;
4455   }
4456 
4457   if (Str.startswith("pos")) {
4458     Str = Str.drop_front(3);
4459     if (Str.getAsInteger(10, Val))
4460       return MatchOperand_ParseFail;
4461 
4462     if (Val > 3)
4463       errorExpTgt();
4464 
4465     Val += 12;
4466     return MatchOperand_Success;
4467   }
4468 
4469   if (Str.startswith("param")) {
4470     Str = Str.drop_front(5);
4471     if (Str.getAsInteger(10, Val))
4472       return MatchOperand_ParseFail;
4473 
4474     if (Val >= 32)
4475       errorExpTgt();
4476 
4477     Val += 32;
4478     return MatchOperand_Success;
4479   }
4480 
4481   if (Str.startswith("invalid_target_")) {
4482     Str = Str.drop_front(15);
4483     if (Str.getAsInteger(10, Val))
4484       return MatchOperand_ParseFail;
4485 
4486     errorExpTgt();
4487     return MatchOperand_Success;
4488   }
4489 
4490   return MatchOperand_NoMatch;
4491 }
4492 
4493 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4494   uint8_t Val;
4495   StringRef Str = Parser.getTok().getString();
4496 
4497   auto Res = parseExpTgtImpl(Str, Val);
4498   if (Res != MatchOperand_Success)
4499     return Res;
4500 
4501   SMLoc S = Parser.getTok().getLoc();
4502   Parser.Lex();
4503 
4504   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4505                                               AMDGPUOperand::ImmTyExpTgt));
4506   return MatchOperand_Success;
4507 }
4508 
4509 OperandMatchResultTy
4510 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4511   using namespace llvm::AMDGPU::SendMsg;
4512 
4513   int64_t Imm16Val = 0;
4514   SMLoc S = Parser.getTok().getLoc();
4515 
4516   switch(getLexer().getKind()) {
4517   default:
4518     return MatchOperand_NoMatch;
4519   case AsmToken::Integer:
4520     // The operand can be an integer value.
4521     if (getParser().parseAbsoluteExpression(Imm16Val))
4522       return MatchOperand_NoMatch;
4523     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4524       Error(S, "invalid immediate: only 16-bit values are legal");
4525       // Do not return error code, but create an imm operand anyway and proceed
4526       // to the next operand, if any. That avoids unneccessary error messages.
4527     }
4528     break;
4529   case AsmToken::Identifier: {
4530       OperandInfoTy Msg(ID_UNKNOWN_);
4531       OperandInfoTy Operation(OP_UNKNOWN_);
4532       int64_t StreamId = STREAM_ID_DEFAULT_;
4533       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4534         return MatchOperand_ParseFail;
4535       do {
4536         // Validate and encode message ID.
4537         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4538                 || Msg.Id == ID_SYSMSG)) {
4539           if (Msg.IsSymbolic)
4540             Error(S, "invalid/unsupported symbolic name of message");
4541           else
4542             Error(S, "invalid/unsupported code of message");
4543           break;
4544         }
4545         Imm16Val = (Msg.Id << ID_SHIFT_);
4546         // Validate and encode operation ID.
4547         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4548           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4549             if (Operation.IsSymbolic)
4550               Error(S, "invalid symbolic name of GS_OP");
4551             else
4552               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4553             break;
4554           }
4555           if (Operation.Id == OP_GS_NOP
4556               && Msg.Id != ID_GS_DONE) {
4557             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4558             break;
4559           }
4560           Imm16Val |= (Operation.Id << OP_SHIFT_);
4561         }
4562         if (Msg.Id == ID_SYSMSG) {
4563           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4564             if (Operation.IsSymbolic)
4565               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4566             else
4567               Error(S, "invalid/unsupported code of SYSMSG_OP");
4568             break;
4569           }
4570           Imm16Val |= (Operation.Id << OP_SHIFT_);
4571         }
4572         // Validate and encode stream ID.
4573         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4574           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4575             Error(S, "invalid stream id: only 2-bit values are legal");
4576             break;
4577           }
4578           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4579         }
4580       } while (false);
4581     }
4582     break;
4583   }
4584   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4585   return MatchOperand_Success;
4586 }
4587 
4588 bool AMDGPUOperand::isSendMsg() const {
4589   return isImmTy(ImmTySendMsg);
4590 }
4591 
4592 //===----------------------------------------------------------------------===//
4593 // parser helpers
4594 //===----------------------------------------------------------------------===//
4595 
4596 bool
4597 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4598   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4599 }
4600 
4601 bool
4602 AMDGPUAsmParser::isId(const StringRef Id) const {
4603   return isId(getToken(), Id);
4604 }
4605 
4606 bool
4607 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4608   return getTokenKind() == Kind;
4609 }
4610 
4611 bool
4612 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4613   if (isId(Id)) {
4614     lex();
4615     return true;
4616   }
4617   return false;
4618 }
4619 
4620 bool
4621 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4622   if (isToken(Kind)) {
4623     lex();
4624     return true;
4625   }
4626   return false;
4627 }
4628 
4629 bool
4630 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4631                            const StringRef ErrMsg) {
4632   if (!trySkipToken(Kind)) {
4633     Error(getLoc(), ErrMsg);
4634     return false;
4635   }
4636   return true;
4637 }
4638 
4639 bool
4640 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4641   return !getParser().parseAbsoluteExpression(Imm);
4642 }
4643 
4644 bool
4645 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4646   if (isToken(AsmToken::String)) {
4647     Val = getToken().getStringContents();
4648     lex();
4649     return true;
4650   } else {
4651     Error(getLoc(), ErrMsg);
4652     return false;
4653   }
4654 }
4655 
4656 AsmToken
4657 AMDGPUAsmParser::getToken() const {
4658   return Parser.getTok();
4659 }
4660 
4661 AsmToken
4662 AMDGPUAsmParser::peekToken() {
4663   return getLexer().peekTok();
4664 }
4665 
4666 void
4667 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4668   auto TokCount = getLexer().peekTokens(Tokens);
4669 
4670   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4671     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4672 }
4673 
4674 AsmToken::TokenKind
4675 AMDGPUAsmParser::getTokenKind() const {
4676   return getLexer().getKind();
4677 }
4678 
4679 SMLoc
4680 AMDGPUAsmParser::getLoc() const {
4681   return getToken().getLoc();
4682 }
4683 
4684 StringRef
4685 AMDGPUAsmParser::getTokenStr() const {
4686   return getToken().getString();
4687 }
4688 
4689 void
4690 AMDGPUAsmParser::lex() {
4691   Parser.Lex();
4692 }
4693 
4694 //===----------------------------------------------------------------------===//
4695 // swizzle
4696 //===----------------------------------------------------------------------===//
4697 
4698 LLVM_READNONE
4699 static unsigned
4700 encodeBitmaskPerm(const unsigned AndMask,
4701                   const unsigned OrMask,
4702                   const unsigned XorMask) {
4703   using namespace llvm::AMDGPU::Swizzle;
4704 
4705   return BITMASK_PERM_ENC |
4706          (AndMask << BITMASK_AND_SHIFT) |
4707          (OrMask  << BITMASK_OR_SHIFT)  |
4708          (XorMask << BITMASK_XOR_SHIFT);
4709 }
4710 
4711 bool
4712 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4713                                       const unsigned MinVal,
4714                                       const unsigned MaxVal,
4715                                       const StringRef ErrMsg) {
4716   for (unsigned i = 0; i < OpNum; ++i) {
4717     if (!skipToken(AsmToken::Comma, "expected a comma")){
4718       return false;
4719     }
4720     SMLoc ExprLoc = Parser.getTok().getLoc();
4721     if (!parseExpr(Op[i])) {
4722       return false;
4723     }
4724     if (Op[i] < MinVal || Op[i] > MaxVal) {
4725       Error(ExprLoc, ErrMsg);
4726       return false;
4727     }
4728   }
4729 
4730   return true;
4731 }
4732 
4733 bool
4734 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4735   using namespace llvm::AMDGPU::Swizzle;
4736 
4737   int64_t Lane[LANE_NUM];
4738   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4739                            "expected a 2-bit lane id")) {
4740     Imm = QUAD_PERM_ENC;
4741     for (unsigned I = 0; I < LANE_NUM; ++I) {
4742       Imm |= Lane[I] << (LANE_SHIFT * I);
4743     }
4744     return true;
4745   }
4746   return false;
4747 }
4748 
4749 bool
4750 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4751   using namespace llvm::AMDGPU::Swizzle;
4752 
4753   SMLoc S = Parser.getTok().getLoc();
4754   int64_t GroupSize;
4755   int64_t LaneIdx;
4756 
4757   if (!parseSwizzleOperands(1, &GroupSize,
4758                             2, 32,
4759                             "group size must be in the interval [2,32]")) {
4760     return false;
4761   }
4762   if (!isPowerOf2_64(GroupSize)) {
4763     Error(S, "group size must be a power of two");
4764     return false;
4765   }
4766   if (parseSwizzleOperands(1, &LaneIdx,
4767                            0, GroupSize - 1,
4768                            "lane id must be in the interval [0,group size - 1]")) {
4769     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4770     return true;
4771   }
4772   return false;
4773 }
4774 
4775 bool
4776 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4777   using namespace llvm::AMDGPU::Swizzle;
4778 
4779   SMLoc S = Parser.getTok().getLoc();
4780   int64_t GroupSize;
4781 
4782   if (!parseSwizzleOperands(1, &GroupSize,
4783       2, 32, "group size must be in the interval [2,32]")) {
4784     return false;
4785   }
4786   if (!isPowerOf2_64(GroupSize)) {
4787     Error(S, "group size must be a power of two");
4788     return false;
4789   }
4790 
4791   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4792   return true;
4793 }
4794 
4795 bool
4796 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4797   using namespace llvm::AMDGPU::Swizzle;
4798 
4799   SMLoc S = Parser.getTok().getLoc();
4800   int64_t GroupSize;
4801 
4802   if (!parseSwizzleOperands(1, &GroupSize,
4803       1, 16, "group size must be in the interval [1,16]")) {
4804     return false;
4805   }
4806   if (!isPowerOf2_64(GroupSize)) {
4807     Error(S, "group size must be a power of two");
4808     return false;
4809   }
4810 
4811   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4812   return true;
4813 }
4814 
4815 bool
4816 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4817   using namespace llvm::AMDGPU::Swizzle;
4818 
4819   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4820     return false;
4821   }
4822 
4823   StringRef Ctl;
4824   SMLoc StrLoc = Parser.getTok().getLoc();
4825   if (!parseString(Ctl)) {
4826     return false;
4827   }
4828   if (Ctl.size() != BITMASK_WIDTH) {
4829     Error(StrLoc, "expected a 5-character mask");
4830     return false;
4831   }
4832 
4833   unsigned AndMask = 0;
4834   unsigned OrMask = 0;
4835   unsigned XorMask = 0;
4836 
4837   for (size_t i = 0; i < Ctl.size(); ++i) {
4838     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4839     switch(Ctl[i]) {
4840     default:
4841       Error(StrLoc, "invalid mask");
4842       return false;
4843     case '0':
4844       break;
4845     case '1':
4846       OrMask |= Mask;
4847       break;
4848     case 'p':
4849       AndMask |= Mask;
4850       break;
4851     case 'i':
4852       AndMask |= Mask;
4853       XorMask |= Mask;
4854       break;
4855     }
4856   }
4857 
4858   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4859   return true;
4860 }
4861 
4862 bool
4863 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4864 
4865   SMLoc OffsetLoc = Parser.getTok().getLoc();
4866 
4867   if (!parseExpr(Imm)) {
4868     return false;
4869   }
4870   if (!isUInt<16>(Imm)) {
4871     Error(OffsetLoc, "expected a 16-bit offset");
4872     return false;
4873   }
4874   return true;
4875 }
4876 
4877 bool
4878 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4879   using namespace llvm::AMDGPU::Swizzle;
4880 
4881   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4882 
4883     SMLoc ModeLoc = Parser.getTok().getLoc();
4884     bool Ok = false;
4885 
4886     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4887       Ok = parseSwizzleQuadPerm(Imm);
4888     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4889       Ok = parseSwizzleBitmaskPerm(Imm);
4890     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4891       Ok = parseSwizzleBroadcast(Imm);
4892     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4893       Ok = parseSwizzleSwap(Imm);
4894     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4895       Ok = parseSwizzleReverse(Imm);
4896     } else {
4897       Error(ModeLoc, "expected a swizzle mode");
4898     }
4899 
4900     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4901   }
4902 
4903   return false;
4904 }
4905 
4906 OperandMatchResultTy
4907 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4908   SMLoc S = Parser.getTok().getLoc();
4909   int64_t Imm = 0;
4910 
4911   if (trySkipId("offset")) {
4912 
4913     bool Ok = false;
4914     if (skipToken(AsmToken::Colon, "expected a colon")) {
4915       if (trySkipId("swizzle")) {
4916         Ok = parseSwizzleMacro(Imm);
4917       } else {
4918         Ok = parseSwizzleOffset(Imm);
4919       }
4920     }
4921 
4922     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4923 
4924     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4925   } else {
4926     // Swizzle "offset" operand is optional.
4927     // If it is omitted, try parsing other optional operands.
4928     return parseOptionalOpr(Operands);
4929   }
4930 }
4931 
4932 bool
4933 AMDGPUOperand::isSwizzle() const {
4934   return isImmTy(ImmTySwizzle);
4935 }
4936 
4937 //===----------------------------------------------------------------------===//
4938 // VGPR Index Mode
4939 //===----------------------------------------------------------------------===//
4940 
4941 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4942 
4943   using namespace llvm::AMDGPU::VGPRIndexMode;
4944 
4945   if (trySkipToken(AsmToken::RParen)) {
4946     return OFF;
4947   }
4948 
4949   int64_t Imm = 0;
4950 
4951   while (true) {
4952     unsigned Mode = 0;
4953     SMLoc S = Parser.getTok().getLoc();
4954 
4955     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4956       if (trySkipId(IdSymbolic[ModeId])) {
4957         Mode = 1 << ModeId;
4958         break;
4959       }
4960     }
4961 
4962     if (Mode == 0) {
4963       Error(S, (Imm == 0)?
4964                "expected a VGPR index mode or a closing parenthesis" :
4965                "expected a VGPR index mode");
4966       break;
4967     }
4968 
4969     if (Imm & Mode) {
4970       Error(S, "duplicate VGPR index mode");
4971       break;
4972     }
4973     Imm |= Mode;
4974 
4975     if (trySkipToken(AsmToken::RParen))
4976       break;
4977     if (!skipToken(AsmToken::Comma,
4978                    "expected a comma or a closing parenthesis"))
4979       break;
4980   }
4981 
4982   return Imm;
4983 }
4984 
4985 OperandMatchResultTy
4986 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4987 
4988   int64_t Imm = 0;
4989   SMLoc S = Parser.getTok().getLoc();
4990 
4991   if (getLexer().getKind() == AsmToken::Identifier &&
4992       Parser.getTok().getString() == "gpr_idx" &&
4993       getLexer().peekTok().is(AsmToken::LParen)) {
4994 
4995     Parser.Lex();
4996     Parser.Lex();
4997 
4998     // If parse failed, trigger an error but do not return error code
4999     // to avoid excessive error messages.
5000     Imm = parseGPRIdxMacro();
5001 
5002   } else {
5003     if (getParser().parseAbsoluteExpression(Imm))
5004       return MatchOperand_NoMatch;
5005     if (Imm < 0 || !isUInt<4>(Imm)) {
5006       Error(S, "invalid immediate: only 4-bit values are legal");
5007     }
5008   }
5009 
5010   Operands.push_back(
5011       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5012   return MatchOperand_Success;
5013 }
5014 
5015 bool AMDGPUOperand::isGPRIdxMode() const {
5016   return isImmTy(ImmTyGprIdxMode);
5017 }
5018 
5019 //===----------------------------------------------------------------------===//
5020 // sopp branch targets
5021 //===----------------------------------------------------------------------===//
5022 
5023 OperandMatchResultTy
5024 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5025   SMLoc S = Parser.getTok().getLoc();
5026 
5027   switch (getLexer().getKind()) {
5028     default: return MatchOperand_ParseFail;
5029     case AsmToken::Integer: {
5030       int64_t Imm;
5031       if (getParser().parseAbsoluteExpression(Imm))
5032         return MatchOperand_ParseFail;
5033       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5034       return MatchOperand_Success;
5035     }
5036 
5037     case AsmToken::Identifier:
5038       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5039           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5040                                   Parser.getTok().getString()), getContext()), S));
5041       Parser.Lex();
5042       return MatchOperand_Success;
5043   }
5044 }
5045 
5046 //===----------------------------------------------------------------------===//
5047 // mubuf
5048 //===----------------------------------------------------------------------===//
5049 
5050 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5051   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5052 }
5053 
5054 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5055   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5056 }
5057 
5058 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5059                                const OperandVector &Operands,
5060                                bool IsAtomic,
5061                                bool IsAtomicReturn,
5062                                bool IsLds) {
5063   bool IsLdsOpcode = IsLds;
5064   bool HasLdsModifier = false;
5065   OptionalImmIndexMap OptionalIdx;
5066   assert(IsAtomicReturn ? IsAtomic : true);
5067   unsigned FirstOperandIdx = 1;
5068 
5069   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5070     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5071 
5072     // Add the register arguments
5073     if (Op.isReg()) {
5074       Op.addRegOperands(Inst, 1);
5075       // Insert a tied src for atomic return dst.
5076       // This cannot be postponed as subsequent calls to
5077       // addImmOperands rely on correct number of MC operands.
5078       if (IsAtomicReturn && i == FirstOperandIdx)
5079         Op.addRegOperands(Inst, 1);
5080       continue;
5081     }
5082 
5083     // Handle the case where soffset is an immediate
5084     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5085       Op.addImmOperands(Inst, 1);
5086       continue;
5087     }
5088 
5089     HasLdsModifier = Op.isLDS();
5090 
5091     // Handle tokens like 'offen' which are sometimes hard-coded into the
5092     // asm string.  There are no MCInst operands for these.
5093     if (Op.isToken()) {
5094       continue;
5095     }
5096     assert(Op.isImm());
5097 
5098     // Handle optional arguments
5099     OptionalIdx[Op.getImmTy()] = i;
5100   }
5101 
5102   // This is a workaround for an llvm quirk which may result in an
5103   // incorrect instruction selection. Lds and non-lds versions of
5104   // MUBUF instructions are identical except that lds versions
5105   // have mandatory 'lds' modifier. However this modifier follows
5106   // optional modifiers and llvm asm matcher regards this 'lds'
5107   // modifier as an optional one. As a result, an lds version
5108   // of opcode may be selected even if it has no 'lds' modifier.
5109   if (IsLdsOpcode && !HasLdsModifier) {
5110     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5111     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5112       Inst.setOpcode(NoLdsOpcode);
5113       IsLdsOpcode = false;
5114     }
5115   }
5116 
5117   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5118   if (!IsAtomic) { // glc is hard-coded.
5119     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5120   }
5121   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5122 
5123   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5124     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5125   }
5126 }
5127 
5128 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5129   OptionalImmIndexMap OptionalIdx;
5130 
5131   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5132     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5133 
5134     // Add the register arguments
5135     if (Op.isReg()) {
5136       Op.addRegOperands(Inst, 1);
5137       continue;
5138     }
5139 
5140     // Handle the case where soffset is an immediate
5141     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5142       Op.addImmOperands(Inst, 1);
5143       continue;
5144     }
5145 
5146     // Handle tokens like 'offen' which are sometimes hard-coded into the
5147     // asm string.  There are no MCInst operands for these.
5148     if (Op.isToken()) {
5149       continue;
5150     }
5151     assert(Op.isImm());
5152 
5153     // Handle optional arguments
5154     OptionalIdx[Op.getImmTy()] = i;
5155   }
5156 
5157   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5158                         AMDGPUOperand::ImmTyOffset);
5159   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5160   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5162   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5163 }
5164 
5165 //===----------------------------------------------------------------------===//
5166 // mimg
5167 //===----------------------------------------------------------------------===//
5168 
5169 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5170                               bool IsAtomic) {
5171   unsigned I = 1;
5172   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5173   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5174     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5175   }
5176 
5177   if (IsAtomic) {
5178     // Add src, same as dst
5179     assert(Desc.getNumDefs() == 1);
5180     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5181   }
5182 
5183   OptionalImmIndexMap OptionalIdx;
5184 
5185   for (unsigned E = Operands.size(); I != E; ++I) {
5186     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5187 
5188     // Add the register arguments
5189     if (Op.isReg()) {
5190       Op.addRegOperands(Inst, 1);
5191     } else if (Op.isImmModifier()) {
5192       OptionalIdx[Op.getImmTy()] = I;
5193     } else {
5194       llvm_unreachable("unexpected operand type");
5195     }
5196   }
5197 
5198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5199   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5200   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5201   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5202   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5203   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5204   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5205   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5206   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5207 }
5208 
5209 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5210   cvtMIMG(Inst, Operands, true);
5211 }
5212 
5213 //===----------------------------------------------------------------------===//
5214 // smrd
5215 //===----------------------------------------------------------------------===//
5216 
5217 bool AMDGPUOperand::isSMRDOffset8() const {
5218   return isImm() && isUInt<8>(getImm());
5219 }
5220 
5221 bool AMDGPUOperand::isSMRDOffset20() const {
5222   return isImm() && isUInt<20>(getImm());
5223 }
5224 
5225 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5226   // 32-bit literals are only supported on CI and we only want to use them
5227   // when the offset is > 8-bits.
5228   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5229 }
5230 
5231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5232   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5233 }
5234 
5235 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5236   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5237 }
5238 
5239 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5240   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5241 }
5242 
5243 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5244   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5245 }
5246 
5247 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5248   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5249 }
5250 
5251 //===----------------------------------------------------------------------===//
5252 // vop3
5253 //===----------------------------------------------------------------------===//
5254 
5255 static bool ConvertOmodMul(int64_t &Mul) {
5256   if (Mul != 1 && Mul != 2 && Mul != 4)
5257     return false;
5258 
5259   Mul >>= 1;
5260   return true;
5261 }
5262 
5263 static bool ConvertOmodDiv(int64_t &Div) {
5264   if (Div == 1) {
5265     Div = 0;
5266     return true;
5267   }
5268 
5269   if (Div == 2) {
5270     Div = 3;
5271     return true;
5272   }
5273 
5274   return false;
5275 }
5276 
5277 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5278   if (BoundCtrl == 0) {
5279     BoundCtrl = 1;
5280     return true;
5281   }
5282 
5283   if (BoundCtrl == -1) {
5284     BoundCtrl = 0;
5285     return true;
5286   }
5287 
5288   return false;
5289 }
5290 
5291 // Note: the order in this table matches the order of operands in AsmString.
5292 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5293   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5294   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5295   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5296   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5297   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5298   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5299   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5300   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5301   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5302   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5303   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5304   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5305   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5306   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5307   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5308   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5309   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5310   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5311   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5312   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5313   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5314   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5315   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5316   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5317   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5318   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5319   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5320   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5321   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5322   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5323   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5324   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5325   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5326   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5327   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5328   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5329   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5330 };
5331 
5332 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5333   unsigned size = Operands.size();
5334   assert(size > 0);
5335 
5336   OperandMatchResultTy res = parseOptionalOpr(Operands);
5337 
5338   // This is a hack to enable hardcoded mandatory operands which follow
5339   // optional operands.
5340   //
5341   // Current design assumes that all operands after the first optional operand
5342   // are also optional. However implementation of some instructions violates
5343   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5344   //
5345   // To alleviate this problem, we have to (implicitly) parse extra operands
5346   // to make sure autogenerated parser of custom operands never hit hardcoded
5347   // mandatory operands.
5348 
5349   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5350 
5351     // We have parsed the first optional operand.
5352     // Parse as many operands as necessary to skip all mandatory operands.
5353 
5354     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5355       if (res != MatchOperand_Success ||
5356           getLexer().is(AsmToken::EndOfStatement)) break;
5357       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5358       res = parseOptionalOpr(Operands);
5359     }
5360   }
5361 
5362   return res;
5363 }
5364 
5365 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5366   OperandMatchResultTy res;
5367   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5368     // try to parse any optional operand here
5369     if (Op.IsBit) {
5370       res = parseNamedBit(Op.Name, Operands, Op.Type);
5371     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5372       res = parseOModOperand(Operands);
5373     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5374                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5375                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5376       res = parseSDWASel(Operands, Op.Name, Op.Type);
5377     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5378       res = parseSDWADstUnused(Operands);
5379     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5380                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5381                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5382                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5383       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5384                                         Op.ConvertResult);
5385     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5386       res = parseDfmtNfmt(Operands);
5387     } else {
5388       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5389     }
5390     if (res != MatchOperand_NoMatch) {
5391       return res;
5392     }
5393   }
5394   return MatchOperand_NoMatch;
5395 }
5396 
5397 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5398   StringRef Name = Parser.getTok().getString();
5399   if (Name == "mul") {
5400     return parseIntWithPrefix("mul", Operands,
5401                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5402   }
5403 
5404   if (Name == "div") {
5405     return parseIntWithPrefix("div", Operands,
5406                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5407   }
5408 
5409   return MatchOperand_NoMatch;
5410 }
5411 
5412 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5413   cvtVOP3P(Inst, Operands);
5414 
5415   int Opc = Inst.getOpcode();
5416 
5417   int SrcNum;
5418   const int Ops[] = { AMDGPU::OpName::src0,
5419                       AMDGPU::OpName::src1,
5420                       AMDGPU::OpName::src2 };
5421   for (SrcNum = 0;
5422        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5423        ++SrcNum);
5424   assert(SrcNum > 0);
5425 
5426   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5427   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5428 
5429   if ((OpSel & (1 << SrcNum)) != 0) {
5430     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5431     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5432     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5433   }
5434 }
5435 
5436 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5437       // 1. This operand is input modifiers
5438   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5439       // 2. This is not last operand
5440       && Desc.NumOperands > (OpNum + 1)
5441       // 3. Next operand is register class
5442       && Desc.OpInfo[OpNum + 1].RegClass != -1
5443       // 4. Next register is not tied to any other operand
5444       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5445 }
5446 
5447 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5448 {
5449   OptionalImmIndexMap OptionalIdx;
5450   unsigned Opc = Inst.getOpcode();
5451 
5452   unsigned I = 1;
5453   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5454   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5455     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5456   }
5457 
5458   for (unsigned E = Operands.size(); I != E; ++I) {
5459     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5460     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5461       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5462     } else if (Op.isInterpSlot() ||
5463                Op.isInterpAttr() ||
5464                Op.isAttrChan()) {
5465       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5466     } else if (Op.isImmModifier()) {
5467       OptionalIdx[Op.getImmTy()] = I;
5468     } else {
5469       llvm_unreachable("unhandled operand type");
5470     }
5471   }
5472 
5473   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5474     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5475   }
5476 
5477   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5478     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5479   }
5480 
5481   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5482     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5483   }
5484 }
5485 
5486 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5487                               OptionalImmIndexMap &OptionalIdx) {
5488   unsigned Opc = Inst.getOpcode();
5489 
5490   unsigned I = 1;
5491   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5492   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5493     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5494   }
5495 
5496   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5497     // This instruction has src modifiers
5498     for (unsigned E = Operands.size(); I != E; ++I) {
5499       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5500       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5501         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5502       } else if (Op.isImmModifier()) {
5503         OptionalIdx[Op.getImmTy()] = I;
5504       } else if (Op.isRegOrImm()) {
5505         Op.addRegOrImmOperands(Inst, 1);
5506       } else {
5507         llvm_unreachable("unhandled operand type");
5508       }
5509     }
5510   } else {
5511     // No src modifiers
5512     for (unsigned E = Operands.size(); I != E; ++I) {
5513       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5514       if (Op.isMod()) {
5515         OptionalIdx[Op.getImmTy()] = I;
5516       } else {
5517         Op.addRegOrImmOperands(Inst, 1);
5518       }
5519     }
5520   }
5521 
5522   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5523     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5524   }
5525 
5526   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5527     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5528   }
5529 
5530   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5531   // it has src2 register operand that is tied to dst operand
5532   // we don't allow modifiers for this operand in assembler so src2_modifiers
5533   // should be 0.
5534   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5535       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5536       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5537       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5538     auto it = Inst.begin();
5539     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5540     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5541     ++it;
5542     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5543   }
5544 }
5545 
5546 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5547   OptionalImmIndexMap OptionalIdx;
5548   cvtVOP3(Inst, Operands, OptionalIdx);
5549 }
5550 
5551 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5552                                const OperandVector &Operands) {
5553   OptionalImmIndexMap OptIdx;
5554   const int Opc = Inst.getOpcode();
5555   const MCInstrDesc &Desc = MII.get(Opc);
5556 
5557   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5558 
5559   cvtVOP3(Inst, Operands, OptIdx);
5560 
5561   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5562     assert(!IsPacked);
5563     Inst.addOperand(Inst.getOperand(0));
5564   }
5565 
5566   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5567   // instruction, and then figure out where to actually put the modifiers
5568 
5569   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5570 
5571   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5572   if (OpSelHiIdx != -1) {
5573     int DefaultVal = IsPacked ? -1 : 0;
5574     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5575                           DefaultVal);
5576   }
5577 
5578   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5579   if (NegLoIdx != -1) {
5580     assert(IsPacked);
5581     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5582     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5583   }
5584 
5585   const int Ops[] = { AMDGPU::OpName::src0,
5586                       AMDGPU::OpName::src1,
5587                       AMDGPU::OpName::src2 };
5588   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5589                          AMDGPU::OpName::src1_modifiers,
5590                          AMDGPU::OpName::src2_modifiers };
5591 
5592   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5593 
5594   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5595   unsigned OpSelHi = 0;
5596   unsigned NegLo = 0;
5597   unsigned NegHi = 0;
5598 
5599   if (OpSelHiIdx != -1) {
5600     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5601   }
5602 
5603   if (NegLoIdx != -1) {
5604     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5605     NegLo = Inst.getOperand(NegLoIdx).getImm();
5606     NegHi = Inst.getOperand(NegHiIdx).getImm();
5607   }
5608 
5609   for (int J = 0; J < 3; ++J) {
5610     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5611     if (OpIdx == -1)
5612       break;
5613 
5614     uint32_t ModVal = 0;
5615 
5616     if ((OpSel & (1 << J)) != 0)
5617       ModVal |= SISrcMods::OP_SEL_0;
5618 
5619     if ((OpSelHi & (1 << J)) != 0)
5620       ModVal |= SISrcMods::OP_SEL_1;
5621 
5622     if ((NegLo & (1 << J)) != 0)
5623       ModVal |= SISrcMods::NEG;
5624 
5625     if ((NegHi & (1 << J)) != 0)
5626       ModVal |= SISrcMods::NEG_HI;
5627 
5628     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5629 
5630     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5631   }
5632 }
5633 
5634 //===----------------------------------------------------------------------===//
5635 // dpp
5636 //===----------------------------------------------------------------------===//
5637 
5638 bool AMDGPUOperand::isDPPCtrl() const {
5639   using namespace AMDGPU::DPP;
5640 
5641   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5642   if (result) {
5643     int64_t Imm = getImm();
5644     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5645            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5646            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5647            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5648            (Imm == DppCtrl::WAVE_SHL1) ||
5649            (Imm == DppCtrl::WAVE_ROL1) ||
5650            (Imm == DppCtrl::WAVE_SHR1) ||
5651            (Imm == DppCtrl::WAVE_ROR1) ||
5652            (Imm == DppCtrl::ROW_MIRROR) ||
5653            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5654            (Imm == DppCtrl::BCAST15) ||
5655            (Imm == DppCtrl::BCAST31);
5656   }
5657   return false;
5658 }
5659 
5660 bool AMDGPUOperand::isS16Imm() const {
5661   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5662 }
5663 
5664 bool AMDGPUOperand::isU16Imm() const {
5665   return isImm() && isUInt<16>(getImm());
5666 }
5667 
5668 OperandMatchResultTy
5669 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5670   using namespace AMDGPU::DPP;
5671 
5672   SMLoc S = Parser.getTok().getLoc();
5673   StringRef Prefix;
5674   int64_t Int;
5675 
5676   if (getLexer().getKind() == AsmToken::Identifier) {
5677     Prefix = Parser.getTok().getString();
5678   } else {
5679     return MatchOperand_NoMatch;
5680   }
5681 
5682   if (Prefix == "row_mirror") {
5683     Int = DppCtrl::ROW_MIRROR;
5684     Parser.Lex();
5685   } else if (Prefix == "row_half_mirror") {
5686     Int = DppCtrl::ROW_HALF_MIRROR;
5687     Parser.Lex();
5688   } else {
5689     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5690     if (Prefix != "quad_perm"
5691         && Prefix != "row_shl"
5692         && Prefix != "row_shr"
5693         && Prefix != "row_ror"
5694         && Prefix != "wave_shl"
5695         && Prefix != "wave_rol"
5696         && Prefix != "wave_shr"
5697         && Prefix != "wave_ror"
5698         && Prefix != "row_bcast") {
5699       return MatchOperand_NoMatch;
5700     }
5701 
5702     Parser.Lex();
5703     if (getLexer().isNot(AsmToken::Colon))
5704       return MatchOperand_ParseFail;
5705 
5706     if (Prefix == "quad_perm") {
5707       // quad_perm:[%d,%d,%d,%d]
5708       Parser.Lex();
5709       if (getLexer().isNot(AsmToken::LBrac))
5710         return MatchOperand_ParseFail;
5711       Parser.Lex();
5712 
5713       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5714         return MatchOperand_ParseFail;
5715 
5716       for (int i = 0; i < 3; ++i) {
5717         if (getLexer().isNot(AsmToken::Comma))
5718           return MatchOperand_ParseFail;
5719         Parser.Lex();
5720 
5721         int64_t Temp;
5722         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5723           return MatchOperand_ParseFail;
5724         const int shift = i*2 + 2;
5725         Int += (Temp << shift);
5726       }
5727 
5728       if (getLexer().isNot(AsmToken::RBrac))
5729         return MatchOperand_ParseFail;
5730       Parser.Lex();
5731     } else {
5732       // sel:%d
5733       Parser.Lex();
5734       if (getParser().parseAbsoluteExpression(Int))
5735         return MatchOperand_ParseFail;
5736 
5737       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5738         Int |= DppCtrl::ROW_SHL0;
5739       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5740         Int |= DppCtrl::ROW_SHR0;
5741       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5742         Int |= DppCtrl::ROW_ROR0;
5743       } else if (Prefix == "wave_shl" && 1 == Int) {
5744         Int = DppCtrl::WAVE_SHL1;
5745       } else if (Prefix == "wave_rol" && 1 == Int) {
5746         Int = DppCtrl::WAVE_ROL1;
5747       } else if (Prefix == "wave_shr" && 1 == Int) {
5748         Int = DppCtrl::WAVE_SHR1;
5749       } else if (Prefix == "wave_ror" && 1 == Int) {
5750         Int = DppCtrl::WAVE_ROR1;
5751       } else if (Prefix == "row_bcast") {
5752         if (Int == 15) {
5753           Int = DppCtrl::BCAST15;
5754         } else if (Int == 31) {
5755           Int = DppCtrl::BCAST31;
5756         } else {
5757           return MatchOperand_ParseFail;
5758         }
5759       } else {
5760         return MatchOperand_ParseFail;
5761       }
5762     }
5763   }
5764 
5765   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5766   return MatchOperand_Success;
5767 }
5768 
5769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5770   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5771 }
5772 
5773 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5774   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5775 }
5776 
5777 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5778   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5779 }
5780 
5781 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5782   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5783 }
5784 
5785 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5786   OptionalImmIndexMap OptionalIdx;
5787 
5788   unsigned I = 1;
5789   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5790   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5791     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5792   }
5793 
5794   for (unsigned E = Operands.size(); I != E; ++I) {
5795     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5796                                             MCOI::TIED_TO);
5797     if (TiedTo != -1) {
5798       assert((unsigned)TiedTo < Inst.getNumOperands());
5799       // handle tied old or src2 for MAC instructions
5800       Inst.addOperand(Inst.getOperand(TiedTo));
5801     }
5802     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5803     // Add the register arguments
5804     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5805       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5806       // Skip it.
5807       continue;
5808     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5809       Op.addRegWithFPInputModsOperands(Inst, 2);
5810     } else if (Op.isDPPCtrl()) {
5811       Op.addImmOperands(Inst, 1);
5812     } else if (Op.isImm()) {
5813       // Handle optional arguments
5814       OptionalIdx[Op.getImmTy()] = I;
5815     } else {
5816       llvm_unreachable("Invalid operand type");
5817     }
5818   }
5819 
5820   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5821   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5822   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5823 }
5824 
5825 //===----------------------------------------------------------------------===//
5826 // sdwa
5827 //===----------------------------------------------------------------------===//
5828 
5829 OperandMatchResultTy
5830 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5831                               AMDGPUOperand::ImmTy Type) {
5832   using namespace llvm::AMDGPU::SDWA;
5833 
5834   SMLoc S = Parser.getTok().getLoc();
5835   StringRef Value;
5836   OperandMatchResultTy res;
5837 
5838   res = parseStringWithPrefix(Prefix, Value);
5839   if (res != MatchOperand_Success) {
5840     return res;
5841   }
5842 
5843   int64_t Int;
5844   Int = StringSwitch<int64_t>(Value)
5845         .Case("BYTE_0", SdwaSel::BYTE_0)
5846         .Case("BYTE_1", SdwaSel::BYTE_1)
5847         .Case("BYTE_2", SdwaSel::BYTE_2)
5848         .Case("BYTE_3", SdwaSel::BYTE_3)
5849         .Case("WORD_0", SdwaSel::WORD_0)
5850         .Case("WORD_1", SdwaSel::WORD_1)
5851         .Case("DWORD", SdwaSel::DWORD)
5852         .Default(0xffffffff);
5853   Parser.Lex(); // eat last token
5854 
5855   if (Int == 0xffffffff) {
5856     return MatchOperand_ParseFail;
5857   }
5858 
5859   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5860   return MatchOperand_Success;
5861 }
5862 
5863 OperandMatchResultTy
5864 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5865   using namespace llvm::AMDGPU::SDWA;
5866 
5867   SMLoc S = Parser.getTok().getLoc();
5868   StringRef Value;
5869   OperandMatchResultTy res;
5870 
5871   res = parseStringWithPrefix("dst_unused", Value);
5872   if (res != MatchOperand_Success) {
5873     return res;
5874   }
5875 
5876   int64_t Int;
5877   Int = StringSwitch<int64_t>(Value)
5878         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5879         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5880         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5881         .Default(0xffffffff);
5882   Parser.Lex(); // eat last token
5883 
5884   if (Int == 0xffffffff) {
5885     return MatchOperand_ParseFail;
5886   }
5887 
5888   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5889   return MatchOperand_Success;
5890 }
5891 
5892 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5893   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5894 }
5895 
5896 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5897   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5898 }
5899 
5900 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5901   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5902 }
5903 
5904 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5905   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5906 }
5907 
5908 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5909                               uint64_t BasicInstType, bool skipVcc) {
5910   using namespace llvm::AMDGPU::SDWA;
5911 
5912   OptionalImmIndexMap OptionalIdx;
5913   bool skippedVcc = false;
5914 
5915   unsigned I = 1;
5916   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5917   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5918     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5919   }
5920 
5921   for (unsigned E = Operands.size(); I != E; ++I) {
5922     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5923     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5924       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5925       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5926       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5927       // Skip VCC only if we didn't skip it on previous iteration.
5928       if (BasicInstType == SIInstrFlags::VOP2 &&
5929           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5930         skippedVcc = true;
5931         continue;
5932       } else if (BasicInstType == SIInstrFlags::VOPC &&
5933                  Inst.getNumOperands() == 0) {
5934         skippedVcc = true;
5935         continue;
5936       }
5937     }
5938     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5939       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5940     } else if (Op.isImm()) {
5941       // Handle optional arguments
5942       OptionalIdx[Op.getImmTy()] = I;
5943     } else {
5944       llvm_unreachable("Invalid operand type");
5945     }
5946     skippedVcc = false;
5947   }
5948 
5949   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5950       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5951     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5952     switch (BasicInstType) {
5953     case SIInstrFlags::VOP1:
5954       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5955       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5956         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5957       }
5958       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5959       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5960       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5961       break;
5962 
5963     case SIInstrFlags::VOP2:
5964       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5965       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5966         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5967       }
5968       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5969       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5970       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5971       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5972       break;
5973 
5974     case SIInstrFlags::VOPC:
5975       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5976       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5977       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5978       break;
5979 
5980     default:
5981       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5982     }
5983   }
5984 
5985   // special case v_mac_{f16, f32}:
5986   // it has src2 register operand that is tied to dst operand
5987   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5988       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5989     auto it = Inst.begin();
5990     std::advance(
5991       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5992     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5993   }
5994 }
5995 
5996 /// Force static initialization.
5997 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5998   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5999   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6000 }
6001 
6002 #define GET_REGISTER_MATCHER
6003 #define GET_MATCHER_IMPLEMENTATION
6004 #define GET_MNEMONIC_SPELL_CHECKER
6005 #include "AMDGPUGenAsmMatcher.inc"
6006 
6007 // This fuction should be defined after auto-generated include so that we have
6008 // MatchClassKind enum defined
6009 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6010                                                      unsigned Kind) {
6011   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6012   // But MatchInstructionImpl() expects to meet token and fails to validate
6013   // operand. This method checks if we are given immediate operand but expect to
6014   // get corresponding token.
6015   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6016   switch (Kind) {
6017   case MCK_addr64:
6018     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6019   case MCK_gds:
6020     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6021   case MCK_lds:
6022     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6023   case MCK_glc:
6024     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6025   case MCK_idxen:
6026     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6027   case MCK_offen:
6028     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6029   case MCK_SSrcB32:
6030     // When operands have expression values, they will return true for isToken,
6031     // because it is not possible to distinguish between a token and an
6032     // expression at parse time. MatchInstructionImpl() will always try to
6033     // match an operand as a token, when isToken returns true, and when the
6034     // name of the expression is not a valid token, the match will fail,
6035     // so we need to handle it here.
6036     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6037   case MCK_SSrcF32:
6038     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6039   case MCK_SoppBrTarget:
6040     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6041   case MCK_VReg32OrOff:
6042     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6043   case MCK_InterpSlot:
6044     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6045   case MCK_Attr:
6046     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6047   case MCK_AttrChan:
6048     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6049   default:
6050     return Match_InvalidOperand;
6051   }
6052 }
6053 
6054 //===----------------------------------------------------------------------===//
6055 // endpgm
6056 //===----------------------------------------------------------------------===//
6057 
6058 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6059   SMLoc S = Parser.getTok().getLoc();
6060   int64_t Imm = 0;
6061 
6062   if (!parseExpr(Imm)) {
6063     // The operand is optional, if not present default to 0
6064     Imm = 0;
6065   }
6066 
6067   if (!isUInt<16>(Imm)) {
6068     Error(S, "expected a 16-bit value");
6069     return MatchOperand_ParseFail;
6070   }
6071 
6072   Operands.push_back(
6073       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6074   return MatchOperand_Success;
6075 }
6076 
6077 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6078