1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   ImmTy getImmTy() const {
693     assert(isImm());
694     return Imm.Type;
695   }
696 
697   unsigned getReg() const override {
698     assert(isRegKind());
699     return Reg.RegNo;
700   }
701 
702   SMLoc getStartLoc() const override {
703     return StartLoc;
704   }
705 
706   SMLoc getEndLoc() const override {
707     return EndLoc;
708   }
709 
710   SMRange getLocRange() const {
711     return SMRange(StartLoc, EndLoc);
712   }
713 
714   Modifiers getModifiers() const {
715     assert(isRegKind() || isImmTy(ImmTyNone));
716     return isRegKind() ? Reg.Mods : Imm.Mods;
717   }
718 
719   void setModifiers(Modifiers Mods) {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     if (isRegKind())
722       Reg.Mods = Mods;
723     else
724       Imm.Mods = Mods;
725   }
726 
727   bool hasModifiers() const {
728     return getModifiers().hasModifiers();
729   }
730 
731   bool hasFPModifiers() const {
732     return getModifiers().hasFPModifiers();
733   }
734 
735   bool hasIntModifiers() const {
736     return getModifiers().hasIntModifiers();
737   }
738 
739   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
740 
741   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
742 
743   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
744 
745   template <unsigned Bitwidth>
746   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
747 
748   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
749     addKImmFPOperands<16>(Inst, N);
750   }
751 
752   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
753     addKImmFPOperands<32>(Inst, N);
754   }
755 
756   void addRegOperands(MCInst &Inst, unsigned N) const;
757 
758   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
759     addRegOperands(Inst, N);
760   }
761 
762   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
763     if (isRegKind())
764       addRegOperands(Inst, N);
765     else if (isExpr())
766       Inst.addOperand(MCOperand::createExpr(Expr));
767     else
768       addImmOperands(Inst, N);
769   }
770 
771   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
772     Modifiers Mods = getModifiers();
773     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
774     if (isRegKind()) {
775       addRegOperands(Inst, N);
776     } else {
777       addImmOperands(Inst, N, false);
778     }
779   }
780 
781   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
782     assert(!hasIntModifiers());
783     addRegOrImmWithInputModsOperands(Inst, N);
784   }
785 
786   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasFPModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
792     Modifiers Mods = getModifiers();
793     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
794     assert(isRegKind());
795     addRegOperands(Inst, N);
796   }
797 
798   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
799     assert(!hasIntModifiers());
800     addRegWithInputModsOperands(Inst, N);
801   }
802 
803   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasFPModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
809     if (isImm())
810       addImmOperands(Inst, N);
811     else {
812       assert(isExpr());
813       Inst.addOperand(MCOperand::createExpr(Expr));
814     }
815   }
816 
817   static void printImmTy(raw_ostream& OS, ImmTy Type) {
818     switch (Type) {
819     case ImmTyNone: OS << "None"; break;
820     case ImmTyGDS: OS << "GDS"; break;
821     case ImmTyLDS: OS << "LDS"; break;
822     case ImmTyOffen: OS << "Offen"; break;
823     case ImmTyIdxen: OS << "Idxen"; break;
824     case ImmTyAddr64: OS << "Addr64"; break;
825     case ImmTyOffset: OS << "Offset"; break;
826     case ImmTyInstOffset: OS << "InstOffset"; break;
827     case ImmTyOffset0: OS << "Offset0"; break;
828     case ImmTyOffset1: OS << "Offset1"; break;
829     case ImmTyDLC: OS << "DLC"; break;
830     case ImmTyGLC: OS << "GLC"; break;
831     case ImmTySLC: OS << "SLC"; break;
832     case ImmTySWZ: OS << "SWZ"; break;
833     case ImmTyTFE: OS << "TFE"; break;
834     case ImmTyD16: OS << "D16"; break;
835     case ImmTyFORMAT: OS << "FORMAT"; break;
836     case ImmTyClampSI: OS << "ClampSI"; break;
837     case ImmTyOModSI: OS << "OModSI"; break;
838     case ImmTyDPP8: OS << "DPP8"; break;
839     case ImmTyDppCtrl: OS << "DppCtrl"; break;
840     case ImmTyDppRowMask: OS << "DppRowMask"; break;
841     case ImmTyDppBankMask: OS << "DppBankMask"; break;
842     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
843     case ImmTyDppFi: OS << "FI"; break;
844     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
845     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
846     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
847     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
848     case ImmTyDMask: OS << "DMask"; break;
849     case ImmTyDim: OS << "Dim"; break;
850     case ImmTyUNorm: OS << "UNorm"; break;
851     case ImmTyDA: OS << "DA"; break;
852     case ImmTyR128A16: OS << "R128A16"; break;
853     case ImmTyA16: OS << "A16"; break;
854     case ImmTyLWE: OS << "LWE"; break;
855     case ImmTyOff: OS << "Off"; break;
856     case ImmTyExpTgt: OS << "ExpTgt"; break;
857     case ImmTyExpCompr: OS << "ExpCompr"; break;
858     case ImmTyExpVM: OS << "ExpVM"; break;
859     case ImmTyHwreg: OS << "Hwreg"; break;
860     case ImmTySendMsg: OS << "SendMsg"; break;
861     case ImmTyInterpSlot: OS << "InterpSlot"; break;
862     case ImmTyInterpAttr: OS << "InterpAttr"; break;
863     case ImmTyAttrChan: OS << "AttrChan"; break;
864     case ImmTyOpSel: OS << "OpSel"; break;
865     case ImmTyOpSelHi: OS << "OpSelHi"; break;
866     case ImmTyNegLo: OS << "NegLo"; break;
867     case ImmTyNegHi: OS << "NegHi"; break;
868     case ImmTySwizzle: OS << "Swizzle"; break;
869     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
870     case ImmTyHigh: OS << "High"; break;
871     case ImmTyBLGP: OS << "BLGP"; break;
872     case ImmTyCBSZ: OS << "CBSZ"; break;
873     case ImmTyABID: OS << "ABID"; break;
874     case ImmTyEndpgm: OS << "Endpgm"; break;
875     }
876   }
877 
878   void print(raw_ostream &OS) const override {
879     switch (Kind) {
880     case Register:
881       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
882       break;
883     case Immediate:
884       OS << '<' << getImm();
885       if (getImmTy() != ImmTyNone) {
886         OS << " type: "; printImmTy(OS, getImmTy());
887       }
888       OS << " mods: " << Imm.Mods << '>';
889       break;
890     case Token:
891       OS << '\'' << getToken() << '\'';
892       break;
893     case Expression:
894       OS << "<expr " << *Expr << '>';
895       break;
896     }
897   }
898 
899   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
900                                       int64_t Val, SMLoc Loc,
901                                       ImmTy Type = ImmTyNone,
902                                       bool IsFPImm = false) {
903     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
904     Op->Imm.Val = Val;
905     Op->Imm.IsFPImm = IsFPImm;
906     Op->Imm.Type = Type;
907     Op->Imm.Mods = Modifiers();
908     Op->StartLoc = Loc;
909     Op->EndLoc = Loc;
910     return Op;
911   }
912 
913   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
914                                         StringRef Str, SMLoc Loc,
915                                         bool HasExplicitEncodingSize = true) {
916     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
917     Res->Tok.Data = Str.data();
918     Res->Tok.Length = Str.size();
919     Res->StartLoc = Loc;
920     Res->EndLoc = Loc;
921     return Res;
922   }
923 
924   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
925                                       unsigned RegNo, SMLoc S,
926                                       SMLoc E) {
927     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
928     Op->Reg.RegNo = RegNo;
929     Op->Reg.Mods = Modifiers();
930     Op->StartLoc = S;
931     Op->EndLoc = E;
932     return Op;
933   }
934 
935   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
936                                        const class MCExpr *Expr, SMLoc S) {
937     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
938     Op->Expr = Expr;
939     Op->StartLoc = S;
940     Op->EndLoc = S;
941     return Op;
942   }
943 };
944 
945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
946   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
947   return OS;
948 }
949 
950 //===----------------------------------------------------------------------===//
951 // AsmParser
952 //===----------------------------------------------------------------------===//
953 
954 // Holds info related to the current kernel, e.g. count of SGPRs used.
955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
956 // .amdgpu_hsa_kernel or at EOF.
957 class KernelScopeInfo {
958   int SgprIndexUnusedMin = -1;
959   int VgprIndexUnusedMin = -1;
960   MCContext *Ctx = nullptr;
961 
962   void usesSgprAt(int i) {
963     if (i >= SgprIndexUnusedMin) {
964       SgprIndexUnusedMin = ++i;
965       if (Ctx) {
966         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
967         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
968       }
969     }
970   }
971 
972   void usesVgprAt(int i) {
973     if (i >= VgprIndexUnusedMin) {
974       VgprIndexUnusedMin = ++i;
975       if (Ctx) {
976         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
977         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
978       }
979     }
980   }
981 
982 public:
983   KernelScopeInfo() = default;
984 
985   void initialize(MCContext &Context) {
986     Ctx = &Context;
987     usesSgprAt(SgprIndexUnusedMin = -1);
988     usesVgprAt(VgprIndexUnusedMin = -1);
989   }
990 
991   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
992     switch (RegKind) {
993       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
994       case IS_AGPR: // fall through
995       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
996       default: break;
997     }
998   }
999 };
1000 
1001 class AMDGPUAsmParser : public MCTargetAsmParser {
1002   MCAsmParser &Parser;
1003 
1004   // Number of extra operands parsed after the first optional operand.
1005   // This may be necessary to skip hardcoded mandatory operands.
1006   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1007 
1008   unsigned ForcedEncodingSize = 0;
1009   bool ForcedDPP = false;
1010   bool ForcedSDWA = false;
1011   KernelScopeInfo KernelScope;
1012 
1013   /// @name Auto-generated Match Functions
1014   /// {
1015 
1016 #define GET_ASSEMBLER_HEADER
1017 #include "AMDGPUGenAsmMatcher.inc"
1018 
1019   /// }
1020 
1021 private:
1022   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1023   bool OutOfRangeError(SMRange Range);
1024   /// Calculate VGPR/SGPR blocks required for given target, reserved
1025   /// registers, and user-specified NextFreeXGPR values.
1026   ///
1027   /// \param Features [in] Target features, used for bug corrections.
1028   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1029   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1030   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1031   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1032   /// descriptor field, if valid.
1033   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1034   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1035   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1036   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1037   /// \param VGPRBlocks [out] Result VGPR block count.
1038   /// \param SGPRBlocks [out] Result SGPR block count.
1039   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1040                           bool FlatScrUsed, bool XNACKUsed,
1041                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1042                           SMRange VGPRRange, unsigned NextFreeSGPR,
1043                           SMRange SGPRRange, unsigned &VGPRBlocks,
1044                           unsigned &SGPRBlocks);
1045   bool ParseDirectiveAMDGCNTarget();
1046   bool ParseDirectiveAMDHSAKernel();
1047   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1048   bool ParseDirectiveHSACodeObjectVersion();
1049   bool ParseDirectiveHSACodeObjectISA();
1050   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1051   bool ParseDirectiveAMDKernelCodeT();
1052   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1053   bool ParseDirectiveAMDGPUHsaKernel();
1054 
1055   bool ParseDirectiveISAVersion();
1056   bool ParseDirectiveHSAMetadata();
1057   bool ParseDirectivePALMetadataBegin();
1058   bool ParseDirectivePALMetadata();
1059   bool ParseDirectiveAMDGPULDS();
1060 
1061   /// Common code to parse out a block of text (typically YAML) between start and
1062   /// end directives.
1063   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1064                            const char *AssemblerDirectiveEnd,
1065                            std::string &CollectString);
1066 
1067   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1068                              RegisterKind RegKind, unsigned Reg1);
1069   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1070                            unsigned &RegNum, unsigned &RegWidth,
1071                            bool RestoreOnFailure = false);
1072   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1073                            unsigned &RegNum, unsigned &RegWidth,
1074                            SmallVectorImpl<AsmToken> &Tokens);
1075   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1076                            unsigned &RegWidth,
1077                            SmallVectorImpl<AsmToken> &Tokens);
1078   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1079                            unsigned &RegWidth,
1080                            SmallVectorImpl<AsmToken> &Tokens);
1081   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1082                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1083   bool ParseRegRange(unsigned& Num, unsigned& Width);
1084   unsigned getRegularReg(RegisterKind RegKind,
1085                          unsigned RegNum,
1086                          unsigned RegWidth);
1087 
1088   bool isRegister();
1089   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1090   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1091   void initializeGprCountSymbol(RegisterKind RegKind);
1092   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1093                              unsigned RegWidth);
1094   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1095                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1096   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1097                  bool IsGdsHardcoded);
1098 
1099 public:
1100   enum AMDGPUMatchResultTy {
1101     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1102   };
1103   enum OperandMode {
1104     OperandMode_Default,
1105     OperandMode_NSA,
1106   };
1107 
1108   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1109 
1110   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1111                const MCInstrInfo &MII,
1112                const MCTargetOptions &Options)
1113       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1114     MCAsmParserExtension::Initialize(Parser);
1115 
1116     if (getFeatureBits().none()) {
1117       // Set default features.
1118       copySTI().ToggleFeature("southern-islands");
1119     }
1120 
1121     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1122 
1123     {
1124       // TODO: make those pre-defined variables read-only.
1125       // Currently there is none suitable machinery in the core llvm-mc for this.
1126       // MCSymbol::isRedefinable is intended for another purpose, and
1127       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1128       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1129       MCContext &Ctx = getContext();
1130       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1131         MCSymbol *Sym =
1132             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1134         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1135         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1136         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1138       } else {
1139         MCSymbol *Sym =
1140             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1144         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1145         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1146       }
1147       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1148         initializeGprCountSymbol(IS_VGPR);
1149         initializeGprCountSymbol(IS_SGPR);
1150       } else
1151         KernelScope.initialize(getContext());
1152     }
1153   }
1154 
1155   bool hasXNACK() const {
1156     return AMDGPU::hasXNACK(getSTI());
1157   }
1158 
1159   bool hasMIMG_R128() const {
1160     return AMDGPU::hasMIMG_R128(getSTI());
1161   }
1162 
1163   bool hasPackedD16() const {
1164     return AMDGPU::hasPackedD16(getSTI());
1165   }
1166 
1167   bool hasGFX10A16() const {
1168     return AMDGPU::hasGFX10A16(getSTI());
1169   }
1170 
1171   bool isSI() const {
1172     return AMDGPU::isSI(getSTI());
1173   }
1174 
1175   bool isCI() const {
1176     return AMDGPU::isCI(getSTI());
1177   }
1178 
1179   bool isVI() const {
1180     return AMDGPU::isVI(getSTI());
1181   }
1182 
1183   bool isGFX9() const {
1184     return AMDGPU::isGFX9(getSTI());
1185   }
1186 
1187   bool isGFX10() const {
1188     return AMDGPU::isGFX10(getSTI());
1189   }
1190 
1191   bool isGFX10_BEncoding() const {
1192     return AMDGPU::isGFX10_BEncoding(getSTI());
1193   }
1194 
1195   bool hasInv2PiInlineImm() const {
1196     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1197   }
1198 
1199   bool hasFlatOffsets() const {
1200     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1201   }
1202 
1203   bool hasSGPR102_SGPR103() const {
1204     return !isVI() && !isGFX9();
1205   }
1206 
1207   bool hasSGPR104_SGPR105() const {
1208     return isGFX10();
1209   }
1210 
1211   bool hasIntClamp() const {
1212     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1213   }
1214 
1215   AMDGPUTargetStreamer &getTargetStreamer() {
1216     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1217     return static_cast<AMDGPUTargetStreamer &>(TS);
1218   }
1219 
1220   const MCRegisterInfo *getMRI() const {
1221     // We need this const_cast because for some reason getContext() is not const
1222     // in MCAsmParser.
1223     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1224   }
1225 
1226   const MCInstrInfo *getMII() const {
1227     return &MII;
1228   }
1229 
1230   const FeatureBitset &getFeatureBits() const {
1231     return getSTI().getFeatureBits();
1232   }
1233 
1234   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1235   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1236   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1237 
1238   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1239   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1240   bool isForcedDPP() const { return ForcedDPP; }
1241   bool isForcedSDWA() const { return ForcedSDWA; }
1242   ArrayRef<unsigned> getMatchedVariants() const;
1243 
1244   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1245   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1246                      bool RestoreOnFailure);
1247   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1248   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1249                                         SMLoc &EndLoc) override;
1250   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1251   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1252                                       unsigned Kind) override;
1253   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1254                                OperandVector &Operands, MCStreamer &Out,
1255                                uint64_t &ErrorInfo,
1256                                bool MatchingInlineAsm) override;
1257   bool ParseDirective(AsmToken DirectiveID) override;
1258   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1259                                     OperandMode Mode = OperandMode_Default);
1260   StringRef parseMnemonicSuffix(StringRef Name);
1261   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1262                         SMLoc NameLoc, OperandVector &Operands) override;
1263   //bool ProcessInstruction(MCInst &Inst);
1264 
1265   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1266 
1267   OperandMatchResultTy
1268   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1269                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1270                      bool (*ConvertResult)(int64_t &) = nullptr);
1271 
1272   OperandMatchResultTy
1273   parseOperandArrayWithPrefix(const char *Prefix,
1274                               OperandVector &Operands,
1275                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1276                               bool (*ConvertResult)(int64_t&) = nullptr);
1277 
1278   OperandMatchResultTy
1279   parseNamedBit(const char *Name, OperandVector &Operands,
1280                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1281   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1282                                              StringRef &Value);
1283 
1284   bool isModifier();
1285   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1286   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1287   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1288   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1289   bool parseSP3NegModifier();
1290   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1291   OperandMatchResultTy parseReg(OperandVector &Operands);
1292   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1293   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1294   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1295   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1296   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1297   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1298   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1299   OperandMatchResultTy parseUfmt(int64_t &Format);
1300   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1301   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1302 
1303   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1304   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1305   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1306   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1307 
1308   bool parseCnt(int64_t &IntVal);
1309   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1310   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1311 
1312 private:
1313   struct OperandInfoTy {
1314     int64_t Id;
1315     bool IsSymbolic = false;
1316     bool IsDefined = false;
1317 
1318     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1319   };
1320 
1321   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1322   bool validateSendMsg(const OperandInfoTy &Msg,
1323                        const OperandInfoTy &Op,
1324                        const OperandInfoTy &Stream,
1325                        const SMLoc Loc);
1326 
1327   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1328   bool validateHwreg(const OperandInfoTy &HwReg,
1329                      const int64_t Offset,
1330                      const int64_t Width,
1331                      const SMLoc Loc);
1332 
1333   void errorExpTgt();
1334   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1335   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1336   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1337 
1338   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1339   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1340   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1341   bool validateSOPLiteral(const MCInst &Inst) const;
1342   bool validateConstantBusLimitations(const MCInst &Inst);
1343   bool validateEarlyClobberLimitations(const MCInst &Inst);
1344   bool validateIntClampSupported(const MCInst &Inst);
1345   bool validateMIMGAtomicDMask(const MCInst &Inst);
1346   bool validateMIMGGatherDMask(const MCInst &Inst);
1347   bool validateMovrels(const MCInst &Inst);
1348   bool validateMIMGDataSize(const MCInst &Inst);
1349   bool validateMIMGAddrSize(const MCInst &Inst);
1350   bool validateMIMGD16(const MCInst &Inst);
1351   bool validateMIMGDim(const MCInst &Inst);
1352   bool validateLdsDirect(const MCInst &Inst);
1353   bool validateOpSel(const MCInst &Inst);
1354   bool validateVccOperand(unsigned Reg) const;
1355   bool validateVOP3Literal(const MCInst &Inst) const;
1356   bool validateMAIAccWrite(const MCInst &Inst);
1357   unsigned getConstantBusLimit(unsigned Opcode) const;
1358   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1359   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1360   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1361 
1362   bool isId(const StringRef Id) const;
1363   bool isId(const AsmToken &Token, const StringRef Id) const;
1364   bool isToken(const AsmToken::TokenKind Kind) const;
1365   bool trySkipId(const StringRef Id);
1366   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1367   bool trySkipToken(const AsmToken::TokenKind Kind);
1368   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1369   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1370   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1371   AsmToken::TokenKind getTokenKind() const;
1372   bool parseExpr(int64_t &Imm);
1373   bool parseExpr(OperandVector &Operands);
1374   StringRef getTokenStr() const;
1375   AsmToken peekToken();
1376   AsmToken getToken() const;
1377   SMLoc getLoc() const;
1378   void lex();
1379 
1380 public:
1381   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1382   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1383 
1384   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1385   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1386   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1387   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1388   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1389   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1390 
1391   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1392                             const unsigned MinVal,
1393                             const unsigned MaxVal,
1394                             const StringRef ErrMsg);
1395   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1396   bool parseSwizzleOffset(int64_t &Imm);
1397   bool parseSwizzleMacro(int64_t &Imm);
1398   bool parseSwizzleQuadPerm(int64_t &Imm);
1399   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1400   bool parseSwizzleBroadcast(int64_t &Imm);
1401   bool parseSwizzleSwap(int64_t &Imm);
1402   bool parseSwizzleReverse(int64_t &Imm);
1403 
1404   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1405   int64_t parseGPRIdxMacro();
1406 
1407   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1408   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1409   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1410   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1411   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1412 
1413   AMDGPUOperand::Ptr defaultDLC() const;
1414   AMDGPUOperand::Ptr defaultGLC() const;
1415   AMDGPUOperand::Ptr defaultSLC() const;
1416 
1417   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1418   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1419   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1420   AMDGPUOperand::Ptr defaultFlatOffset() const;
1421 
1422   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1423 
1424   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1425                OptionalImmIndexMap &OptionalIdx);
1426   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1427   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1428   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1429 
1430   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1431 
1432   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1433                bool IsAtomic = false);
1434   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1435 
1436   OperandMatchResultTy parseDim(OperandVector &Operands);
1437   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1438   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1439   AMDGPUOperand::Ptr defaultRowMask() const;
1440   AMDGPUOperand::Ptr defaultBankMask() const;
1441   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1442   AMDGPUOperand::Ptr defaultFI() const;
1443   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1444   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1445 
1446   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1447                                     AMDGPUOperand::ImmTy Type);
1448   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1449   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1450   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1451   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1452   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1453   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1454   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1455                uint64_t BasicInstType,
1456                bool SkipDstVcc = false,
1457                bool SkipSrcVcc = false);
1458 
1459   AMDGPUOperand::Ptr defaultBLGP() const;
1460   AMDGPUOperand::Ptr defaultCBSZ() const;
1461   AMDGPUOperand::Ptr defaultABID() const;
1462 
1463   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1464   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1465 };
1466 
1467 struct OptionalOperand {
1468   const char *Name;
1469   AMDGPUOperand::ImmTy Type;
1470   bool IsBit;
1471   bool (*ConvertResult)(int64_t&);
1472 };
1473 
1474 } // end anonymous namespace
1475 
1476 // May be called with integer type with equivalent bitwidth.
1477 static const fltSemantics *getFltSemantics(unsigned Size) {
1478   switch (Size) {
1479   case 4:
1480     return &APFloat::IEEEsingle();
1481   case 8:
1482     return &APFloat::IEEEdouble();
1483   case 2:
1484     return &APFloat::IEEEhalf();
1485   default:
1486     llvm_unreachable("unsupported fp type");
1487   }
1488 }
1489 
1490 static const fltSemantics *getFltSemantics(MVT VT) {
1491   return getFltSemantics(VT.getSizeInBits() / 8);
1492 }
1493 
1494 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1495   switch (OperandType) {
1496   case AMDGPU::OPERAND_REG_IMM_INT32:
1497   case AMDGPU::OPERAND_REG_IMM_FP32:
1498   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1499   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1500   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1501   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1502     return &APFloat::IEEEsingle();
1503   case AMDGPU::OPERAND_REG_IMM_INT64:
1504   case AMDGPU::OPERAND_REG_IMM_FP64:
1505   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1506   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1507     return &APFloat::IEEEdouble();
1508   case AMDGPU::OPERAND_REG_IMM_INT16:
1509   case AMDGPU::OPERAND_REG_IMM_FP16:
1510   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1511   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1512   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1513   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1514   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1515   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1516   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1517   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1518   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1519   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1520     return &APFloat::IEEEhalf();
1521   default:
1522     llvm_unreachable("unsupported fp type");
1523   }
1524 }
1525 
1526 //===----------------------------------------------------------------------===//
1527 // Operand
1528 //===----------------------------------------------------------------------===//
1529 
1530 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1531   bool Lost;
1532 
1533   // Convert literal to single precision
1534   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1535                                                APFloat::rmNearestTiesToEven,
1536                                                &Lost);
1537   // We allow precision lost but not overflow or underflow
1538   if (Status != APFloat::opOK &&
1539       Lost &&
1540       ((Status & APFloat::opOverflow)  != 0 ||
1541        (Status & APFloat::opUnderflow) != 0)) {
1542     return false;
1543   }
1544 
1545   return true;
1546 }
1547 
1548 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1549   return isUIntN(Size, Val) || isIntN(Size, Val);
1550 }
1551 
1552 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1553   if (VT.getScalarType() == MVT::i16) {
1554     // FP immediate values are broken.
1555     return isInlinableIntLiteral(Val);
1556   }
1557 
1558   // f16/v2f16 operands work correctly for all values.
1559   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1560 }
1561 
1562 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1563 
1564   // This is a hack to enable named inline values like
1565   // shared_base with both 32-bit and 64-bit operands.
1566   // Note that these values are defined as
1567   // 32-bit operands only.
1568   if (isInlineValue()) {
1569     return true;
1570   }
1571 
1572   if (!isImmTy(ImmTyNone)) {
1573     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1574     return false;
1575   }
1576   // TODO: We should avoid using host float here. It would be better to
1577   // check the float bit values which is what a few other places do.
1578   // We've had bot failures before due to weird NaN support on mips hosts.
1579 
1580   APInt Literal(64, Imm.Val);
1581 
1582   if (Imm.IsFPImm) { // We got fp literal token
1583     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1584       return AMDGPU::isInlinableLiteral64(Imm.Val,
1585                                           AsmParser->hasInv2PiInlineImm());
1586     }
1587 
1588     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1589     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1590       return false;
1591 
1592     if (type.getScalarSizeInBits() == 16) {
1593       return isInlineableLiteralOp16(
1594         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1595         type, AsmParser->hasInv2PiInlineImm());
1596     }
1597 
1598     // Check if single precision literal is inlinable
1599     return AMDGPU::isInlinableLiteral32(
1600       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1601       AsmParser->hasInv2PiInlineImm());
1602   }
1603 
1604   // We got int literal token.
1605   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1606     return AMDGPU::isInlinableLiteral64(Imm.Val,
1607                                         AsmParser->hasInv2PiInlineImm());
1608   }
1609 
1610   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1611     return false;
1612   }
1613 
1614   if (type.getScalarSizeInBits() == 16) {
1615     return isInlineableLiteralOp16(
1616       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1617       type, AsmParser->hasInv2PiInlineImm());
1618   }
1619 
1620   return AMDGPU::isInlinableLiteral32(
1621     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1622     AsmParser->hasInv2PiInlineImm());
1623 }
1624 
1625 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1626   // Check that this immediate can be added as literal
1627   if (!isImmTy(ImmTyNone)) {
1628     return false;
1629   }
1630 
1631   if (!Imm.IsFPImm) {
1632     // We got int literal token.
1633 
1634     if (type == MVT::f64 && hasFPModifiers()) {
1635       // Cannot apply fp modifiers to int literals preserving the same semantics
1636       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1637       // disable these cases.
1638       return false;
1639     }
1640 
1641     unsigned Size = type.getSizeInBits();
1642     if (Size == 64)
1643       Size = 32;
1644 
1645     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1646     // types.
1647     return isSafeTruncation(Imm.Val, Size);
1648   }
1649 
1650   // We got fp literal token
1651   if (type == MVT::f64) { // Expected 64-bit fp operand
1652     // We would set low 64-bits of literal to zeroes but we accept this literals
1653     return true;
1654   }
1655 
1656   if (type == MVT::i64) { // Expected 64-bit int operand
1657     // We don't allow fp literals in 64-bit integer instructions. It is
1658     // unclear how we should encode them.
1659     return false;
1660   }
1661 
1662   // We allow fp literals with f16x2 operands assuming that the specified
1663   // literal goes into the lower half and the upper half is zero. We also
1664   // require that the literal may be losslesly converted to f16.
1665   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1666                      (type == MVT::v2i16)? MVT::i16 : type;
1667 
1668   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1669   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1670 }
1671 
1672 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1673   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1674 }
1675 
1676 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1677   if (AsmParser->isVI())
1678     return isVReg32();
1679   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1680     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1681   else
1682     return false;
1683 }
1684 
1685 bool AMDGPUOperand::isSDWAFP16Operand() const {
1686   return isSDWAOperand(MVT::f16);
1687 }
1688 
1689 bool AMDGPUOperand::isSDWAFP32Operand() const {
1690   return isSDWAOperand(MVT::f32);
1691 }
1692 
1693 bool AMDGPUOperand::isSDWAInt16Operand() const {
1694   return isSDWAOperand(MVT::i16);
1695 }
1696 
1697 bool AMDGPUOperand::isSDWAInt32Operand() const {
1698   return isSDWAOperand(MVT::i32);
1699 }
1700 
1701 bool AMDGPUOperand::isBoolReg() const {
1702   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1703          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1704 }
1705 
1706 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1707 {
1708   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1709   assert(Size == 2 || Size == 4 || Size == 8);
1710 
1711   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1712 
1713   if (Imm.Mods.Abs) {
1714     Val &= ~FpSignMask;
1715   }
1716   if (Imm.Mods.Neg) {
1717     Val ^= FpSignMask;
1718   }
1719 
1720   return Val;
1721 }
1722 
1723 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1724   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1725                              Inst.getNumOperands())) {
1726     addLiteralImmOperand(Inst, Imm.Val,
1727                          ApplyModifiers &
1728                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1729   } else {
1730     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1731     Inst.addOperand(MCOperand::createImm(Imm.Val));
1732   }
1733 }
1734 
1735 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1736   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1737   auto OpNum = Inst.getNumOperands();
1738   // Check that this operand accepts literals
1739   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1740 
1741   if (ApplyModifiers) {
1742     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1743     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1744     Val = applyInputFPModifiers(Val, Size);
1745   }
1746 
1747   APInt Literal(64, Val);
1748   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1749 
1750   if (Imm.IsFPImm) { // We got fp literal token
1751     switch (OpTy) {
1752     case AMDGPU::OPERAND_REG_IMM_INT64:
1753     case AMDGPU::OPERAND_REG_IMM_FP64:
1754     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1755     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1756       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1757                                        AsmParser->hasInv2PiInlineImm())) {
1758         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1759         return;
1760       }
1761 
1762       // Non-inlineable
1763       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1764         // For fp operands we check if low 32 bits are zeros
1765         if (Literal.getLoBits(32) != 0) {
1766           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1767           "Can't encode literal as exact 64-bit floating-point operand. "
1768           "Low 32-bits will be set to zero");
1769         }
1770 
1771         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1772         return;
1773       }
1774 
1775       // We don't allow fp literals in 64-bit integer instructions. It is
1776       // unclear how we should encode them. This case should be checked earlier
1777       // in predicate methods (isLiteralImm())
1778       llvm_unreachable("fp literal in 64-bit integer instruction.");
1779 
1780     case AMDGPU::OPERAND_REG_IMM_INT32:
1781     case AMDGPU::OPERAND_REG_IMM_FP32:
1782     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1783     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1784     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1785     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1786     case AMDGPU::OPERAND_REG_IMM_INT16:
1787     case AMDGPU::OPERAND_REG_IMM_FP16:
1788     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1789     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1790     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1791     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1792     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1793     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1794     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1795     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1796     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1797     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1798       bool lost;
1799       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1800       // Convert literal to single precision
1801       FPLiteral.convert(*getOpFltSemantics(OpTy),
1802                         APFloat::rmNearestTiesToEven, &lost);
1803       // We allow precision lost but not overflow or underflow. This should be
1804       // checked earlier in isLiteralImm()
1805 
1806       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1807       Inst.addOperand(MCOperand::createImm(ImmVal));
1808       return;
1809     }
1810     default:
1811       llvm_unreachable("invalid operand size");
1812     }
1813 
1814     return;
1815   }
1816 
1817   // We got int literal token.
1818   // Only sign extend inline immediates.
1819   switch (OpTy) {
1820   case AMDGPU::OPERAND_REG_IMM_INT32:
1821   case AMDGPU::OPERAND_REG_IMM_FP32:
1822   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1823   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1824   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1825   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1826   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1827   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1828     if (isSafeTruncation(Val, 32) &&
1829         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1830                                      AsmParser->hasInv2PiInlineImm())) {
1831       Inst.addOperand(MCOperand::createImm(Val));
1832       return;
1833     }
1834 
1835     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1836     return;
1837 
1838   case AMDGPU::OPERAND_REG_IMM_INT64:
1839   case AMDGPU::OPERAND_REG_IMM_FP64:
1840   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1841   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1842     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1843       Inst.addOperand(MCOperand::createImm(Val));
1844       return;
1845     }
1846 
1847     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1848     return;
1849 
1850   case AMDGPU::OPERAND_REG_IMM_INT16:
1851   case AMDGPU::OPERAND_REG_IMM_FP16:
1852   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1853   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1854   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1855   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1856     if (isSafeTruncation(Val, 16) &&
1857         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1858                                      AsmParser->hasInv2PiInlineImm())) {
1859       Inst.addOperand(MCOperand::createImm(Val));
1860       return;
1861     }
1862 
1863     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1864     return;
1865 
1866   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1868   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1869   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1870     assert(isSafeTruncation(Val, 16));
1871     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1872                                         AsmParser->hasInv2PiInlineImm()));
1873 
1874     Inst.addOperand(MCOperand::createImm(Val));
1875     return;
1876   }
1877   default:
1878     llvm_unreachable("invalid operand size");
1879   }
1880 }
1881 
1882 template <unsigned Bitwidth>
1883 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1884   APInt Literal(64, Imm.Val);
1885 
1886   if (!Imm.IsFPImm) {
1887     // We got int literal token.
1888     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1889     return;
1890   }
1891 
1892   bool Lost;
1893   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1894   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1895                     APFloat::rmNearestTiesToEven, &Lost);
1896   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1897 }
1898 
1899 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1900   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1901 }
1902 
1903 static bool isInlineValue(unsigned Reg) {
1904   switch (Reg) {
1905   case AMDGPU::SRC_SHARED_BASE:
1906   case AMDGPU::SRC_SHARED_LIMIT:
1907   case AMDGPU::SRC_PRIVATE_BASE:
1908   case AMDGPU::SRC_PRIVATE_LIMIT:
1909   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1910     return true;
1911   case AMDGPU::SRC_VCCZ:
1912   case AMDGPU::SRC_EXECZ:
1913   case AMDGPU::SRC_SCC:
1914     return true;
1915   case AMDGPU::SGPR_NULL:
1916     return true;
1917   default:
1918     return false;
1919   }
1920 }
1921 
1922 bool AMDGPUOperand::isInlineValue() const {
1923   return isRegKind() && ::isInlineValue(getReg());
1924 }
1925 
1926 //===----------------------------------------------------------------------===//
1927 // AsmParser
1928 //===----------------------------------------------------------------------===//
1929 
1930 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1931   if (Is == IS_VGPR) {
1932     switch (RegWidth) {
1933       default: return -1;
1934       case 1: return AMDGPU::VGPR_32RegClassID;
1935       case 2: return AMDGPU::VReg_64RegClassID;
1936       case 3: return AMDGPU::VReg_96RegClassID;
1937       case 4: return AMDGPU::VReg_128RegClassID;
1938       case 5: return AMDGPU::VReg_160RegClassID;
1939       case 6: return AMDGPU::VReg_192RegClassID;
1940       case 8: return AMDGPU::VReg_256RegClassID;
1941       case 16: return AMDGPU::VReg_512RegClassID;
1942       case 32: return AMDGPU::VReg_1024RegClassID;
1943     }
1944   } else if (Is == IS_TTMP) {
1945     switch (RegWidth) {
1946       default: return -1;
1947       case 1: return AMDGPU::TTMP_32RegClassID;
1948       case 2: return AMDGPU::TTMP_64RegClassID;
1949       case 4: return AMDGPU::TTMP_128RegClassID;
1950       case 8: return AMDGPU::TTMP_256RegClassID;
1951       case 16: return AMDGPU::TTMP_512RegClassID;
1952     }
1953   } else if (Is == IS_SGPR) {
1954     switch (RegWidth) {
1955       default: return -1;
1956       case 1: return AMDGPU::SGPR_32RegClassID;
1957       case 2: return AMDGPU::SGPR_64RegClassID;
1958       case 3: return AMDGPU::SGPR_96RegClassID;
1959       case 4: return AMDGPU::SGPR_128RegClassID;
1960       case 5: return AMDGPU::SGPR_160RegClassID;
1961       case 6: return AMDGPU::SGPR_192RegClassID;
1962       case 8: return AMDGPU::SGPR_256RegClassID;
1963       case 16: return AMDGPU::SGPR_512RegClassID;
1964     }
1965   } else if (Is == IS_AGPR) {
1966     switch (RegWidth) {
1967       default: return -1;
1968       case 1: return AMDGPU::AGPR_32RegClassID;
1969       case 2: return AMDGPU::AReg_64RegClassID;
1970       case 3: return AMDGPU::AReg_96RegClassID;
1971       case 4: return AMDGPU::AReg_128RegClassID;
1972       case 5: return AMDGPU::AReg_160RegClassID;
1973       case 6: return AMDGPU::AReg_192RegClassID;
1974       case 8: return AMDGPU::AReg_256RegClassID;
1975       case 16: return AMDGPU::AReg_512RegClassID;
1976       case 32: return AMDGPU::AReg_1024RegClassID;
1977     }
1978   }
1979   return -1;
1980 }
1981 
1982 static unsigned getSpecialRegForName(StringRef RegName) {
1983   return StringSwitch<unsigned>(RegName)
1984     .Case("exec", AMDGPU::EXEC)
1985     .Case("vcc", AMDGPU::VCC)
1986     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1987     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1988     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1989     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1990     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1991     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1992     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1993     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1994     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1995     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1996     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1997     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1998     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1999     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2000     .Case("m0", AMDGPU::M0)
2001     .Case("vccz", AMDGPU::SRC_VCCZ)
2002     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2003     .Case("execz", AMDGPU::SRC_EXECZ)
2004     .Case("src_execz", AMDGPU::SRC_EXECZ)
2005     .Case("scc", AMDGPU::SRC_SCC)
2006     .Case("src_scc", AMDGPU::SRC_SCC)
2007     .Case("tba", AMDGPU::TBA)
2008     .Case("tma", AMDGPU::TMA)
2009     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2010     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2011     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2012     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2013     .Case("vcc_lo", AMDGPU::VCC_LO)
2014     .Case("vcc_hi", AMDGPU::VCC_HI)
2015     .Case("exec_lo", AMDGPU::EXEC_LO)
2016     .Case("exec_hi", AMDGPU::EXEC_HI)
2017     .Case("tma_lo", AMDGPU::TMA_LO)
2018     .Case("tma_hi", AMDGPU::TMA_HI)
2019     .Case("tba_lo", AMDGPU::TBA_LO)
2020     .Case("tba_hi", AMDGPU::TBA_HI)
2021     .Case("pc", AMDGPU::PC_REG)
2022     .Case("null", AMDGPU::SGPR_NULL)
2023     .Default(AMDGPU::NoRegister);
2024 }
2025 
2026 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2027                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2028   auto R = parseRegister();
2029   if (!R) return true;
2030   assert(R->isReg());
2031   RegNo = R->getReg();
2032   StartLoc = R->getStartLoc();
2033   EndLoc = R->getEndLoc();
2034   return false;
2035 }
2036 
2037 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2038                                     SMLoc &EndLoc) {
2039   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2040 }
2041 
2042 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2043                                                        SMLoc &StartLoc,
2044                                                        SMLoc &EndLoc) {
2045   bool Result =
2046       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2047   bool PendingErrors = getParser().hasPendingError();
2048   getParser().clearPendingErrors();
2049   if (PendingErrors)
2050     return MatchOperand_ParseFail;
2051   if (Result)
2052     return MatchOperand_NoMatch;
2053   return MatchOperand_Success;
2054 }
2055 
2056 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2057                                             RegisterKind RegKind, unsigned Reg1) {
2058   switch (RegKind) {
2059   case IS_SPECIAL:
2060     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2061       Reg = AMDGPU::EXEC;
2062       RegWidth = 2;
2063       return true;
2064     }
2065     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2066       Reg = AMDGPU::FLAT_SCR;
2067       RegWidth = 2;
2068       return true;
2069     }
2070     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2071       Reg = AMDGPU::XNACK_MASK;
2072       RegWidth = 2;
2073       return true;
2074     }
2075     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2076       Reg = AMDGPU::VCC;
2077       RegWidth = 2;
2078       return true;
2079     }
2080     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2081       Reg = AMDGPU::TBA;
2082       RegWidth = 2;
2083       return true;
2084     }
2085     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2086       Reg = AMDGPU::TMA;
2087       RegWidth = 2;
2088       return true;
2089     }
2090     return false;
2091   case IS_VGPR:
2092   case IS_SGPR:
2093   case IS_AGPR:
2094   case IS_TTMP:
2095     if (Reg1 != Reg + RegWidth) {
2096       return false;
2097     }
2098     RegWidth++;
2099     return true;
2100   default:
2101     llvm_unreachable("unexpected register kind");
2102   }
2103 }
2104 
2105 struct RegInfo {
2106   StringLiteral Name;
2107   RegisterKind Kind;
2108 };
2109 
2110 static constexpr RegInfo RegularRegisters[] = {
2111   {{"v"},    IS_VGPR},
2112   {{"s"},    IS_SGPR},
2113   {{"ttmp"}, IS_TTMP},
2114   {{"acc"},  IS_AGPR},
2115   {{"a"},    IS_AGPR},
2116 };
2117 
2118 static bool isRegularReg(RegisterKind Kind) {
2119   return Kind == IS_VGPR ||
2120          Kind == IS_SGPR ||
2121          Kind == IS_TTMP ||
2122          Kind == IS_AGPR;
2123 }
2124 
2125 static const RegInfo* getRegularRegInfo(StringRef Str) {
2126   for (const RegInfo &Reg : RegularRegisters)
2127     if (Str.startswith(Reg.Name))
2128       return &Reg;
2129   return nullptr;
2130 }
2131 
2132 static bool getRegNum(StringRef Str, unsigned& Num) {
2133   return !Str.getAsInteger(10, Num);
2134 }
2135 
2136 bool
2137 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2138                             const AsmToken &NextToken) const {
2139 
2140   // A list of consecutive registers: [s0,s1,s2,s3]
2141   if (Token.is(AsmToken::LBrac))
2142     return true;
2143 
2144   if (!Token.is(AsmToken::Identifier))
2145     return false;
2146 
2147   // A single register like s0 or a range of registers like s[0:1]
2148 
2149   StringRef Str = Token.getString();
2150   const RegInfo *Reg = getRegularRegInfo(Str);
2151   if (Reg) {
2152     StringRef RegName = Reg->Name;
2153     StringRef RegSuffix = Str.substr(RegName.size());
2154     if (!RegSuffix.empty()) {
2155       unsigned Num;
2156       // A single register with an index: rXX
2157       if (getRegNum(RegSuffix, Num))
2158         return true;
2159     } else {
2160       // A range of registers: r[XX:YY].
2161       if (NextToken.is(AsmToken::LBrac))
2162         return true;
2163     }
2164   }
2165 
2166   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2167 }
2168 
2169 bool
2170 AMDGPUAsmParser::isRegister()
2171 {
2172   return isRegister(getToken(), peekToken());
2173 }
2174 
2175 unsigned
2176 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2177                                unsigned RegNum,
2178                                unsigned RegWidth) {
2179 
2180   assert(isRegularReg(RegKind));
2181 
2182   unsigned AlignSize = 1;
2183   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2184     // SGPR and TTMP registers must be aligned.
2185     // Max required alignment is 4 dwords.
2186     AlignSize = std::min(RegWidth, 4u);
2187   }
2188 
2189   if (RegNum % AlignSize != 0)
2190     return AMDGPU::NoRegister;
2191 
2192   unsigned RegIdx = RegNum / AlignSize;
2193   int RCID = getRegClass(RegKind, RegWidth);
2194   if (RCID == -1)
2195     return AMDGPU::NoRegister;
2196 
2197   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2198   const MCRegisterClass RC = TRI->getRegClass(RCID);
2199   if (RegIdx >= RC.getNumRegs())
2200     return AMDGPU::NoRegister;
2201 
2202   return RC.getRegister(RegIdx);
2203 }
2204 
2205 bool
2206 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2207   int64_t RegLo, RegHi;
2208   if (!trySkipToken(AsmToken::LBrac))
2209     return false;
2210 
2211   if (!parseExpr(RegLo))
2212     return false;
2213 
2214   if (trySkipToken(AsmToken::Colon)) {
2215     if (!parseExpr(RegHi))
2216       return false;
2217   } else {
2218     RegHi = RegLo;
2219   }
2220 
2221   if (!trySkipToken(AsmToken::RBrac))
2222     return false;
2223 
2224   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2225     return false;
2226 
2227   Num = static_cast<unsigned>(RegLo);
2228   Width = (RegHi - RegLo) + 1;
2229   return true;
2230 }
2231 
2232 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2233                                           unsigned &RegNum, unsigned &RegWidth,
2234                                           SmallVectorImpl<AsmToken> &Tokens) {
2235   assert(isToken(AsmToken::Identifier));
2236   unsigned Reg = getSpecialRegForName(getTokenStr());
2237   if (Reg) {
2238     RegNum = 0;
2239     RegWidth = 1;
2240     RegKind = IS_SPECIAL;
2241     Tokens.push_back(getToken());
2242     lex(); // skip register name
2243   }
2244   return Reg;
2245 }
2246 
2247 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2248                                           unsigned &RegNum, unsigned &RegWidth,
2249                                           SmallVectorImpl<AsmToken> &Tokens) {
2250   assert(isToken(AsmToken::Identifier));
2251   StringRef RegName = getTokenStr();
2252 
2253   const RegInfo *RI = getRegularRegInfo(RegName);
2254   if (!RI)
2255     return AMDGPU::NoRegister;
2256   Tokens.push_back(getToken());
2257   lex(); // skip register name
2258 
2259   RegKind = RI->Kind;
2260   StringRef RegSuffix = RegName.substr(RI->Name.size());
2261   if (!RegSuffix.empty()) {
2262     // Single 32-bit register: vXX.
2263     if (!getRegNum(RegSuffix, RegNum))
2264       return AMDGPU::NoRegister;
2265     RegWidth = 1;
2266   } else {
2267     // Range of registers: v[XX:YY]. ":YY" is optional.
2268     if (!ParseRegRange(RegNum, RegWidth))
2269       return AMDGPU::NoRegister;
2270   }
2271 
2272   return getRegularReg(RegKind, RegNum, RegWidth);
2273 }
2274 
2275 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2276                                        unsigned &RegWidth,
2277                                        SmallVectorImpl<AsmToken> &Tokens) {
2278   unsigned Reg = AMDGPU::NoRegister;
2279 
2280   if (!trySkipToken(AsmToken::LBrac))
2281     return AMDGPU::NoRegister;
2282 
2283   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2284 
2285   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2286     return AMDGPU::NoRegister;
2287   if (RegWidth != 1)
2288     return AMDGPU::NoRegister;
2289 
2290   for (; trySkipToken(AsmToken::Comma); ) {
2291     RegisterKind NextRegKind;
2292     unsigned NextReg, NextRegNum, NextRegWidth;
2293 
2294     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2295                              Tokens))
2296       return AMDGPU::NoRegister;
2297     if (NextRegWidth != 1)
2298       return AMDGPU::NoRegister;
2299     if (NextRegKind != RegKind)
2300       return AMDGPU::NoRegister;
2301     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2302       return AMDGPU::NoRegister;
2303   }
2304 
2305   if (!trySkipToken(AsmToken::RBrac))
2306     return AMDGPU::NoRegister;
2307 
2308   if (isRegularReg(RegKind))
2309     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2310 
2311   return Reg;
2312 }
2313 
2314 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2315                                           unsigned &RegNum, unsigned &RegWidth,
2316                                           SmallVectorImpl<AsmToken> &Tokens) {
2317   Reg = AMDGPU::NoRegister;
2318 
2319   if (isToken(AsmToken::Identifier)) {
2320     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2321     if (Reg == AMDGPU::NoRegister)
2322       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2323   } else {
2324     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2325   }
2326 
2327   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2328   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2329 }
2330 
2331 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2332                                           unsigned &RegNum, unsigned &RegWidth,
2333                                           bool RestoreOnFailure) {
2334   Reg = AMDGPU::NoRegister;
2335 
2336   SmallVector<AsmToken, 1> Tokens;
2337   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2338     if (RestoreOnFailure) {
2339       while (!Tokens.empty()) {
2340         getLexer().UnLex(Tokens.pop_back_val());
2341       }
2342     }
2343     return true;
2344   }
2345   return false;
2346 }
2347 
2348 Optional<StringRef>
2349 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2350   switch (RegKind) {
2351   case IS_VGPR:
2352     return StringRef(".amdgcn.next_free_vgpr");
2353   case IS_SGPR:
2354     return StringRef(".amdgcn.next_free_sgpr");
2355   default:
2356     return None;
2357   }
2358 }
2359 
2360 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2361   auto SymbolName = getGprCountSymbolName(RegKind);
2362   assert(SymbolName && "initializing invalid register kind");
2363   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2364   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2365 }
2366 
2367 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2368                                             unsigned DwordRegIndex,
2369                                             unsigned RegWidth) {
2370   // Symbols are only defined for GCN targets
2371   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2372     return true;
2373 
2374   auto SymbolName = getGprCountSymbolName(RegKind);
2375   if (!SymbolName)
2376     return true;
2377   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2378 
2379   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2380   int64_t OldCount;
2381 
2382   if (!Sym->isVariable())
2383     return !Error(getParser().getTok().getLoc(),
2384                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2385   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2386     return !Error(
2387         getParser().getTok().getLoc(),
2388         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2389 
2390   if (OldCount <= NewMax)
2391     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2392 
2393   return true;
2394 }
2395 
2396 std::unique_ptr<AMDGPUOperand>
2397 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2398   const auto &Tok = Parser.getTok();
2399   SMLoc StartLoc = Tok.getLoc();
2400   SMLoc EndLoc = Tok.getEndLoc();
2401   RegisterKind RegKind;
2402   unsigned Reg, RegNum, RegWidth;
2403 
2404   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2405     //FIXME: improve error messages (bug 41303).
2406     Error(StartLoc, "not a valid operand.");
2407     return nullptr;
2408   }
2409   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2410     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2411       return nullptr;
2412   } else
2413     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2414   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2415 }
2416 
2417 OperandMatchResultTy
2418 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2419   // TODO: add syntactic sugar for 1/(2*PI)
2420 
2421   assert(!isRegister());
2422   assert(!isModifier());
2423 
2424   const auto& Tok = getToken();
2425   const auto& NextTok = peekToken();
2426   bool IsReal = Tok.is(AsmToken::Real);
2427   SMLoc S = getLoc();
2428   bool Negate = false;
2429 
2430   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2431     lex();
2432     IsReal = true;
2433     Negate = true;
2434   }
2435 
2436   if (IsReal) {
2437     // Floating-point expressions are not supported.
2438     // Can only allow floating-point literals with an
2439     // optional sign.
2440 
2441     StringRef Num = getTokenStr();
2442     lex();
2443 
2444     APFloat RealVal(APFloat::IEEEdouble());
2445     auto roundMode = APFloat::rmNearestTiesToEven;
2446     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2447       return MatchOperand_ParseFail;
2448     }
2449     if (Negate)
2450       RealVal.changeSign();
2451 
2452     Operands.push_back(
2453       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2454                                AMDGPUOperand::ImmTyNone, true));
2455 
2456     return MatchOperand_Success;
2457 
2458   } else {
2459     int64_t IntVal;
2460     const MCExpr *Expr;
2461     SMLoc S = getLoc();
2462 
2463     if (HasSP3AbsModifier) {
2464       // This is a workaround for handling expressions
2465       // as arguments of SP3 'abs' modifier, for example:
2466       //     |1.0|
2467       //     |-1|
2468       //     |1+x|
2469       // This syntax is not compatible with syntax of standard
2470       // MC expressions (due to the trailing '|').
2471       SMLoc EndLoc;
2472       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2473         return MatchOperand_ParseFail;
2474     } else {
2475       if (Parser.parseExpression(Expr))
2476         return MatchOperand_ParseFail;
2477     }
2478 
2479     if (Expr->evaluateAsAbsolute(IntVal)) {
2480       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2481     } else {
2482       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2483     }
2484 
2485     return MatchOperand_Success;
2486   }
2487 
2488   return MatchOperand_NoMatch;
2489 }
2490 
2491 OperandMatchResultTy
2492 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2493   if (!isRegister())
2494     return MatchOperand_NoMatch;
2495 
2496   if (auto R = parseRegister()) {
2497     assert(R->isReg());
2498     Operands.push_back(std::move(R));
2499     return MatchOperand_Success;
2500   }
2501   return MatchOperand_ParseFail;
2502 }
2503 
2504 OperandMatchResultTy
2505 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2506   auto res = parseReg(Operands);
2507   if (res != MatchOperand_NoMatch) {
2508     return res;
2509   } else if (isModifier()) {
2510     return MatchOperand_NoMatch;
2511   } else {
2512     return parseImm(Operands, HasSP3AbsMod);
2513   }
2514 }
2515 
2516 bool
2517 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2518   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2519     const auto &str = Token.getString();
2520     return str == "abs" || str == "neg" || str == "sext";
2521   }
2522   return false;
2523 }
2524 
2525 bool
2526 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2527   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2528 }
2529 
2530 bool
2531 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2532   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2533 }
2534 
2535 bool
2536 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2537   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2538 }
2539 
2540 // Check if this is an operand modifier or an opcode modifier
2541 // which may look like an expression but it is not. We should
2542 // avoid parsing these modifiers as expressions. Currently
2543 // recognized sequences are:
2544 //   |...|
2545 //   abs(...)
2546 //   neg(...)
2547 //   sext(...)
2548 //   -reg
2549 //   -|...|
2550 //   -abs(...)
2551 //   name:...
2552 // Note that simple opcode modifiers like 'gds' may be parsed as
2553 // expressions; this is a special case. See getExpressionAsToken.
2554 //
2555 bool
2556 AMDGPUAsmParser::isModifier() {
2557 
2558   AsmToken Tok = getToken();
2559   AsmToken NextToken[2];
2560   peekTokens(NextToken);
2561 
2562   return isOperandModifier(Tok, NextToken[0]) ||
2563          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2564          isOpcodeModifierWithVal(Tok, NextToken[0]);
2565 }
2566 
2567 // Check if the current token is an SP3 'neg' modifier.
2568 // Currently this modifier is allowed in the following context:
2569 //
2570 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2571 // 2. Before an 'abs' modifier: -abs(...)
2572 // 3. Before an SP3 'abs' modifier: -|...|
2573 //
2574 // In all other cases "-" is handled as a part
2575 // of an expression that follows the sign.
2576 //
2577 // Note: When "-" is followed by an integer literal,
2578 // this is interpreted as integer negation rather
2579 // than a floating-point NEG modifier applied to N.
2580 // Beside being contr-intuitive, such use of floating-point
2581 // NEG modifier would have resulted in different meaning
2582 // of integer literals used with VOP1/2/C and VOP3,
2583 // for example:
2584 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2585 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2586 // Negative fp literals with preceding "-" are
2587 // handled likewise for unifomtity
2588 //
2589 bool
2590 AMDGPUAsmParser::parseSP3NegModifier() {
2591 
2592   AsmToken NextToken[2];
2593   peekTokens(NextToken);
2594 
2595   if (isToken(AsmToken::Minus) &&
2596       (isRegister(NextToken[0], NextToken[1]) ||
2597        NextToken[0].is(AsmToken::Pipe) ||
2598        isId(NextToken[0], "abs"))) {
2599     lex();
2600     return true;
2601   }
2602 
2603   return false;
2604 }
2605 
2606 OperandMatchResultTy
2607 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2608                                               bool AllowImm) {
2609   bool Neg, SP3Neg;
2610   bool Abs, SP3Abs;
2611   SMLoc Loc;
2612 
2613   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2614   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2615     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2616     return MatchOperand_ParseFail;
2617   }
2618 
2619   SP3Neg = parseSP3NegModifier();
2620 
2621   Loc = getLoc();
2622   Neg = trySkipId("neg");
2623   if (Neg && SP3Neg) {
2624     Error(Loc, "expected register or immediate");
2625     return MatchOperand_ParseFail;
2626   }
2627   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2628     return MatchOperand_ParseFail;
2629 
2630   Abs = trySkipId("abs");
2631   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2632     return MatchOperand_ParseFail;
2633 
2634   Loc = getLoc();
2635   SP3Abs = trySkipToken(AsmToken::Pipe);
2636   if (Abs && SP3Abs) {
2637     Error(Loc, "expected register or immediate");
2638     return MatchOperand_ParseFail;
2639   }
2640 
2641   OperandMatchResultTy Res;
2642   if (AllowImm) {
2643     Res = parseRegOrImm(Operands, SP3Abs);
2644   } else {
2645     Res = parseReg(Operands);
2646   }
2647   if (Res != MatchOperand_Success) {
2648     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2649   }
2650 
2651   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2652     return MatchOperand_ParseFail;
2653   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2654     return MatchOperand_ParseFail;
2655   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2656     return MatchOperand_ParseFail;
2657 
2658   AMDGPUOperand::Modifiers Mods;
2659   Mods.Abs = Abs || SP3Abs;
2660   Mods.Neg = Neg || SP3Neg;
2661 
2662   if (Mods.hasFPModifiers()) {
2663     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2664     if (Op.isExpr()) {
2665       Error(Op.getStartLoc(), "expected an absolute expression");
2666       return MatchOperand_ParseFail;
2667     }
2668     Op.setModifiers(Mods);
2669   }
2670   return MatchOperand_Success;
2671 }
2672 
2673 OperandMatchResultTy
2674 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2675                                                bool AllowImm) {
2676   bool Sext = trySkipId("sext");
2677   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2678     return MatchOperand_ParseFail;
2679 
2680   OperandMatchResultTy Res;
2681   if (AllowImm) {
2682     Res = parseRegOrImm(Operands);
2683   } else {
2684     Res = parseReg(Operands);
2685   }
2686   if (Res != MatchOperand_Success) {
2687     return Sext? MatchOperand_ParseFail : Res;
2688   }
2689 
2690   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2691     return MatchOperand_ParseFail;
2692 
2693   AMDGPUOperand::Modifiers Mods;
2694   Mods.Sext = Sext;
2695 
2696   if (Mods.hasIntModifiers()) {
2697     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2698     if (Op.isExpr()) {
2699       Error(Op.getStartLoc(), "expected an absolute expression");
2700       return MatchOperand_ParseFail;
2701     }
2702     Op.setModifiers(Mods);
2703   }
2704 
2705   return MatchOperand_Success;
2706 }
2707 
2708 OperandMatchResultTy
2709 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2710   return parseRegOrImmWithFPInputMods(Operands, false);
2711 }
2712 
2713 OperandMatchResultTy
2714 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2715   return parseRegOrImmWithIntInputMods(Operands, false);
2716 }
2717 
2718 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2719   auto Loc = getLoc();
2720   if (trySkipId("off")) {
2721     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2722                                                 AMDGPUOperand::ImmTyOff, false));
2723     return MatchOperand_Success;
2724   }
2725 
2726   if (!isRegister())
2727     return MatchOperand_NoMatch;
2728 
2729   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2730   if (Reg) {
2731     Operands.push_back(std::move(Reg));
2732     return MatchOperand_Success;
2733   }
2734 
2735   return MatchOperand_ParseFail;
2736 
2737 }
2738 
2739 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2740   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2741 
2742   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2743       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2744       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2745       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2746     return Match_InvalidOperand;
2747 
2748   if ((TSFlags & SIInstrFlags::VOP3) &&
2749       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2750       getForcedEncodingSize() != 64)
2751     return Match_PreferE32;
2752 
2753   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2754       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2755     // v_mac_f32/16 allow only dst_sel == DWORD;
2756     auto OpNum =
2757         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2758     const auto &Op = Inst.getOperand(OpNum);
2759     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2760       return Match_InvalidOperand;
2761     }
2762   }
2763 
2764   return Match_Success;
2765 }
2766 
2767 // What asm variants we should check
2768 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2769   if (getForcedEncodingSize() == 32) {
2770     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2771     return makeArrayRef(Variants);
2772   }
2773 
2774   if (isForcedVOP3()) {
2775     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2776     return makeArrayRef(Variants);
2777   }
2778 
2779   if (isForcedSDWA()) {
2780     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2781                                         AMDGPUAsmVariants::SDWA9};
2782     return makeArrayRef(Variants);
2783   }
2784 
2785   if (isForcedDPP()) {
2786     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2787     return makeArrayRef(Variants);
2788   }
2789 
2790   static const unsigned Variants[] = {
2791     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2792     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2793   };
2794 
2795   return makeArrayRef(Variants);
2796 }
2797 
2798 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2799   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2800   const unsigned Num = Desc.getNumImplicitUses();
2801   for (unsigned i = 0; i < Num; ++i) {
2802     unsigned Reg = Desc.ImplicitUses[i];
2803     switch (Reg) {
2804     case AMDGPU::FLAT_SCR:
2805     case AMDGPU::VCC:
2806     case AMDGPU::VCC_LO:
2807     case AMDGPU::VCC_HI:
2808     case AMDGPU::M0:
2809       return Reg;
2810     default:
2811       break;
2812     }
2813   }
2814   return AMDGPU::NoRegister;
2815 }
2816 
2817 // NB: This code is correct only when used to check constant
2818 // bus limitations because GFX7 support no f16 inline constants.
2819 // Note that there are no cases when a GFX7 opcode violates
2820 // constant bus limitations due to the use of an f16 constant.
2821 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2822                                        unsigned OpIdx) const {
2823   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2824 
2825   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2826     return false;
2827   }
2828 
2829   const MCOperand &MO = Inst.getOperand(OpIdx);
2830 
2831   int64_t Val = MO.getImm();
2832   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2833 
2834   switch (OpSize) { // expected operand size
2835   case 8:
2836     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2837   case 4:
2838     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2839   case 2: {
2840     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2841     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2842         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2843         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2844       return AMDGPU::isInlinableIntLiteral(Val);
2845 
2846     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2847         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2848         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2849       return AMDGPU::isInlinableIntLiteralV216(Val);
2850 
2851     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2852         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2853         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2854       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2855 
2856     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2857   }
2858   default:
2859     llvm_unreachable("invalid operand size");
2860   }
2861 }
2862 
2863 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2864   if (!isGFX10())
2865     return 1;
2866 
2867   switch (Opcode) {
2868   // 64-bit shift instructions can use only one scalar value input
2869   case AMDGPU::V_LSHLREV_B64:
2870   case AMDGPU::V_LSHLREV_B64_gfx10:
2871   case AMDGPU::V_LSHL_B64:
2872   case AMDGPU::V_LSHRREV_B64:
2873   case AMDGPU::V_LSHRREV_B64_gfx10:
2874   case AMDGPU::V_LSHR_B64:
2875   case AMDGPU::V_ASHRREV_I64:
2876   case AMDGPU::V_ASHRREV_I64_gfx10:
2877   case AMDGPU::V_ASHR_I64:
2878     return 1;
2879   default:
2880     return 2;
2881   }
2882 }
2883 
2884 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2885   const MCOperand &MO = Inst.getOperand(OpIdx);
2886   if (MO.isImm()) {
2887     return !isInlineConstant(Inst, OpIdx);
2888   } else if (MO.isReg()) {
2889     auto Reg = MO.getReg();
2890     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2891     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2892   } else {
2893     return true;
2894   }
2895 }
2896 
2897 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2898   const unsigned Opcode = Inst.getOpcode();
2899   const MCInstrDesc &Desc = MII.get(Opcode);
2900   unsigned ConstantBusUseCount = 0;
2901   unsigned NumLiterals = 0;
2902   unsigned LiteralSize;
2903 
2904   if (Desc.TSFlags &
2905       (SIInstrFlags::VOPC |
2906        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2907        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2908        SIInstrFlags::SDWA)) {
2909     // Check special imm operands (used by madmk, etc)
2910     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2911       ++ConstantBusUseCount;
2912     }
2913 
2914     SmallDenseSet<unsigned> SGPRsUsed;
2915     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2916     if (SGPRUsed != AMDGPU::NoRegister) {
2917       SGPRsUsed.insert(SGPRUsed);
2918       ++ConstantBusUseCount;
2919     }
2920 
2921     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2922     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2923     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2924 
2925     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2926 
2927     for (int OpIdx : OpIndices) {
2928       if (OpIdx == -1) break;
2929 
2930       const MCOperand &MO = Inst.getOperand(OpIdx);
2931       if (usesConstantBus(Inst, OpIdx)) {
2932         if (MO.isReg()) {
2933           const unsigned Reg = mc2PseudoReg(MO.getReg());
2934           // Pairs of registers with a partial intersections like these
2935           //   s0, s[0:1]
2936           //   flat_scratch_lo, flat_scratch
2937           //   flat_scratch_lo, flat_scratch_hi
2938           // are theoretically valid but they are disabled anyway.
2939           // Note that this code mimics SIInstrInfo::verifyInstruction
2940           if (!SGPRsUsed.count(Reg)) {
2941             SGPRsUsed.insert(Reg);
2942             ++ConstantBusUseCount;
2943           }
2944         } else { // Expression or a literal
2945 
2946           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2947             continue; // special operand like VINTERP attr_chan
2948 
2949           // An instruction may use only one literal.
2950           // This has been validated on the previous step.
2951           // See validateVOP3Literal.
2952           // This literal may be used as more than one operand.
2953           // If all these operands are of the same size,
2954           // this literal counts as one scalar value.
2955           // Otherwise it counts as 2 scalar values.
2956           // See "GFX10 Shader Programming", section 3.6.2.3.
2957 
2958           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2959           if (Size < 4) Size = 4;
2960 
2961           if (NumLiterals == 0) {
2962             NumLiterals = 1;
2963             LiteralSize = Size;
2964           } else if (LiteralSize != Size) {
2965             NumLiterals = 2;
2966           }
2967         }
2968       }
2969     }
2970   }
2971   ConstantBusUseCount += NumLiterals;
2972 
2973   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2974 }
2975 
2976 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2977   const unsigned Opcode = Inst.getOpcode();
2978   const MCInstrDesc &Desc = MII.get(Opcode);
2979 
2980   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2981   if (DstIdx == -1 ||
2982       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2983     return true;
2984   }
2985 
2986   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2987 
2988   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2989   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2990   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2991 
2992   assert(DstIdx != -1);
2993   const MCOperand &Dst = Inst.getOperand(DstIdx);
2994   assert(Dst.isReg());
2995   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2996 
2997   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2998 
2999   for (int SrcIdx : SrcIndices) {
3000     if (SrcIdx == -1) break;
3001     const MCOperand &Src = Inst.getOperand(SrcIdx);
3002     if (Src.isReg()) {
3003       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3004       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3005         return false;
3006       }
3007     }
3008   }
3009 
3010   return true;
3011 }
3012 
3013 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3014 
3015   const unsigned Opc = Inst.getOpcode();
3016   const MCInstrDesc &Desc = MII.get(Opc);
3017 
3018   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3019     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3020     assert(ClampIdx != -1);
3021     return Inst.getOperand(ClampIdx).getImm() == 0;
3022   }
3023 
3024   return true;
3025 }
3026 
3027 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3028 
3029   const unsigned Opc = Inst.getOpcode();
3030   const MCInstrDesc &Desc = MII.get(Opc);
3031 
3032   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3033     return true;
3034 
3035   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3036   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3037   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3038 
3039   assert(VDataIdx != -1);
3040   assert(DMaskIdx != -1);
3041   assert(TFEIdx != -1);
3042 
3043   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3044   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3045   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3046   if (DMask == 0)
3047     DMask = 1;
3048 
3049   unsigned DataSize =
3050     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3051   if (hasPackedD16()) {
3052     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3053     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3054       DataSize = (DataSize + 1) / 2;
3055   }
3056 
3057   return (VDataSize / 4) == DataSize + TFESize;
3058 }
3059 
3060 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3061   const unsigned Opc = Inst.getOpcode();
3062   const MCInstrDesc &Desc = MII.get(Opc);
3063 
3064   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3065     return true;
3066 
3067   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3068   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3069       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3070   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3071   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3072   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3073 
3074   assert(VAddr0Idx != -1);
3075   assert(SrsrcIdx != -1);
3076   assert(DimIdx != -1);
3077   assert(SrsrcIdx > VAddr0Idx);
3078 
3079   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3080   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3081   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3082   unsigned VAddrSize =
3083       IsNSA ? SrsrcIdx - VAddr0Idx
3084             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3085 
3086   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3087                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3088                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3089                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3090   if (!IsNSA) {
3091     if (AddrSize > 8)
3092       AddrSize = 16;
3093     else if (AddrSize > 4)
3094       AddrSize = 8;
3095   }
3096 
3097   return VAddrSize == AddrSize;
3098 }
3099 
3100 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3101 
3102   const unsigned Opc = Inst.getOpcode();
3103   const MCInstrDesc &Desc = MII.get(Opc);
3104 
3105   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3106     return true;
3107   if (!Desc.mayLoad() || !Desc.mayStore())
3108     return true; // Not atomic
3109 
3110   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3111   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3112 
3113   // This is an incomplete check because image_atomic_cmpswap
3114   // may only use 0x3 and 0xf while other atomic operations
3115   // may use 0x1 and 0x3. However these limitations are
3116   // verified when we check that dmask matches dst size.
3117   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3118 }
3119 
3120 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3121 
3122   const unsigned Opc = Inst.getOpcode();
3123   const MCInstrDesc &Desc = MII.get(Opc);
3124 
3125   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3126     return true;
3127 
3128   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3129   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3130 
3131   // GATHER4 instructions use dmask in a different fashion compared to
3132   // other MIMG instructions. The only useful DMASK values are
3133   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3134   // (red,red,red,red) etc.) The ISA document doesn't mention
3135   // this.
3136   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3137 }
3138 
3139 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3140 {
3141   switch (Opcode) {
3142   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3143   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3144   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3145     return true;
3146   default:
3147     return false;
3148   }
3149 }
3150 
3151 // movrels* opcodes should only allow VGPRS as src0.
3152 // This is specified in .td description for vop1/vop3,
3153 // but sdwa is handled differently. See isSDWAOperand.
3154 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3155 
3156   const unsigned Opc = Inst.getOpcode();
3157   const MCInstrDesc &Desc = MII.get(Opc);
3158 
3159   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3160     return true;
3161 
3162   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3163   assert(Src0Idx != -1);
3164 
3165   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3166   if (!Src0.isReg())
3167     return false;
3168 
3169   auto Reg = Src0.getReg();
3170   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3171   return !isSGPR(mc2PseudoReg(Reg), TRI);
3172 }
3173 
3174 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3175 
3176   const unsigned Opc = Inst.getOpcode();
3177 
3178   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3179     return true;
3180 
3181   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3182   assert(Src0Idx != -1);
3183 
3184   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3185   if (!Src0.isReg())
3186     return true;
3187 
3188   auto Reg = Src0.getReg();
3189   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3190   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3191     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3192     return false;
3193   }
3194 
3195   return true;
3196 }
3197 
3198 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3199 
3200   const unsigned Opc = Inst.getOpcode();
3201   const MCInstrDesc &Desc = MII.get(Opc);
3202 
3203   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3204     return true;
3205 
3206   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3207   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3208     if (isCI() || isSI())
3209       return false;
3210   }
3211 
3212   return true;
3213 }
3214 
3215 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3216   const unsigned Opc = Inst.getOpcode();
3217   const MCInstrDesc &Desc = MII.get(Opc);
3218 
3219   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3220     return true;
3221 
3222   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3223   if (DimIdx < 0)
3224     return true;
3225 
3226   long Imm = Inst.getOperand(DimIdx).getImm();
3227   if (Imm < 0 || Imm >= 8)
3228     return false;
3229 
3230   return true;
3231 }
3232 
3233 static bool IsRevOpcode(const unsigned Opcode)
3234 {
3235   switch (Opcode) {
3236   case AMDGPU::V_SUBREV_F32_e32:
3237   case AMDGPU::V_SUBREV_F32_e64:
3238   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3239   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3240   case AMDGPU::V_SUBREV_F32_e32_vi:
3241   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3242   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3243   case AMDGPU::V_SUBREV_F32_e64_vi:
3244 
3245   case AMDGPU::V_SUBREV_CO_U32_e32:
3246   case AMDGPU::V_SUBREV_CO_U32_e64:
3247   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3248   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3249 
3250   case AMDGPU::V_SUBBREV_U32_e32:
3251   case AMDGPU::V_SUBBREV_U32_e64:
3252   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3253   case AMDGPU::V_SUBBREV_U32_e32_vi:
3254   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3255   case AMDGPU::V_SUBBREV_U32_e64_vi:
3256 
3257   case AMDGPU::V_SUBREV_U32_e32:
3258   case AMDGPU::V_SUBREV_U32_e64:
3259   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3260   case AMDGPU::V_SUBREV_U32_e32_vi:
3261   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3262   case AMDGPU::V_SUBREV_U32_e64_vi:
3263 
3264   case AMDGPU::V_SUBREV_F16_e32:
3265   case AMDGPU::V_SUBREV_F16_e64:
3266   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3267   case AMDGPU::V_SUBREV_F16_e32_vi:
3268   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3269   case AMDGPU::V_SUBREV_F16_e64_vi:
3270 
3271   case AMDGPU::V_SUBREV_U16_e32:
3272   case AMDGPU::V_SUBREV_U16_e64:
3273   case AMDGPU::V_SUBREV_U16_e32_vi:
3274   case AMDGPU::V_SUBREV_U16_e64_vi:
3275 
3276   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3277   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3278   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3279 
3280   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3281   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3282 
3283   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3284   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3285 
3286   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3287   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3288 
3289   case AMDGPU::V_LSHRREV_B32_e32:
3290   case AMDGPU::V_LSHRREV_B32_e64:
3291   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3292   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3293   case AMDGPU::V_LSHRREV_B32_e32_vi:
3294   case AMDGPU::V_LSHRREV_B32_e64_vi:
3295   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3296   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3297 
3298   case AMDGPU::V_ASHRREV_I32_e32:
3299   case AMDGPU::V_ASHRREV_I32_e64:
3300   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3301   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3302   case AMDGPU::V_ASHRREV_I32_e32_vi:
3303   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3304   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3305   case AMDGPU::V_ASHRREV_I32_e64_vi:
3306 
3307   case AMDGPU::V_LSHLREV_B32_e32:
3308   case AMDGPU::V_LSHLREV_B32_e64:
3309   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3310   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3311   case AMDGPU::V_LSHLREV_B32_e32_vi:
3312   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3313   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3314   case AMDGPU::V_LSHLREV_B32_e64_vi:
3315 
3316   case AMDGPU::V_LSHLREV_B16_e32:
3317   case AMDGPU::V_LSHLREV_B16_e64:
3318   case AMDGPU::V_LSHLREV_B16_e32_vi:
3319   case AMDGPU::V_LSHLREV_B16_e64_vi:
3320   case AMDGPU::V_LSHLREV_B16_gfx10:
3321 
3322   case AMDGPU::V_LSHRREV_B16_e32:
3323   case AMDGPU::V_LSHRREV_B16_e64:
3324   case AMDGPU::V_LSHRREV_B16_e32_vi:
3325   case AMDGPU::V_LSHRREV_B16_e64_vi:
3326   case AMDGPU::V_LSHRREV_B16_gfx10:
3327 
3328   case AMDGPU::V_ASHRREV_I16_e32:
3329   case AMDGPU::V_ASHRREV_I16_e64:
3330   case AMDGPU::V_ASHRREV_I16_e32_vi:
3331   case AMDGPU::V_ASHRREV_I16_e64_vi:
3332   case AMDGPU::V_ASHRREV_I16_gfx10:
3333 
3334   case AMDGPU::V_LSHLREV_B64:
3335   case AMDGPU::V_LSHLREV_B64_gfx10:
3336   case AMDGPU::V_LSHLREV_B64_vi:
3337 
3338   case AMDGPU::V_LSHRREV_B64:
3339   case AMDGPU::V_LSHRREV_B64_gfx10:
3340   case AMDGPU::V_LSHRREV_B64_vi:
3341 
3342   case AMDGPU::V_ASHRREV_I64:
3343   case AMDGPU::V_ASHRREV_I64_gfx10:
3344   case AMDGPU::V_ASHRREV_I64_vi:
3345 
3346   case AMDGPU::V_PK_LSHLREV_B16:
3347   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3348   case AMDGPU::V_PK_LSHLREV_B16_vi:
3349 
3350   case AMDGPU::V_PK_LSHRREV_B16:
3351   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3352   case AMDGPU::V_PK_LSHRREV_B16_vi:
3353   case AMDGPU::V_PK_ASHRREV_I16:
3354   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3355   case AMDGPU::V_PK_ASHRREV_I16_vi:
3356     return true;
3357   default:
3358     return false;
3359   }
3360 }
3361 
3362 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3363 
3364   using namespace SIInstrFlags;
3365   const unsigned Opcode = Inst.getOpcode();
3366   const MCInstrDesc &Desc = MII.get(Opcode);
3367 
3368   // lds_direct register is defined so that it can be used
3369   // with 9-bit operands only. Ignore encodings which do not accept these.
3370   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3371     return true;
3372 
3373   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3374   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3375   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3376 
3377   const int SrcIndices[] = { Src1Idx, Src2Idx };
3378 
3379   // lds_direct cannot be specified as either src1 or src2.
3380   for (int SrcIdx : SrcIndices) {
3381     if (SrcIdx == -1) break;
3382     const MCOperand &Src = Inst.getOperand(SrcIdx);
3383     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3384       return false;
3385     }
3386   }
3387 
3388   if (Src0Idx == -1)
3389     return true;
3390 
3391   const MCOperand &Src = Inst.getOperand(Src0Idx);
3392   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3393     return true;
3394 
3395   // lds_direct is specified as src0. Check additional limitations.
3396   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3397 }
3398 
3399 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3400   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3401     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3402     if (Op.isFlatOffset())
3403       return Op.getStartLoc();
3404   }
3405   return getLoc();
3406 }
3407 
3408 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3409                                          const OperandVector &Operands) {
3410   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3411   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3412     return true;
3413 
3414   auto Opcode = Inst.getOpcode();
3415   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3416   assert(OpNum != -1);
3417 
3418   const auto &Op = Inst.getOperand(OpNum);
3419   if (!hasFlatOffsets() && Op.getImm() != 0) {
3420     Error(getFlatOffsetLoc(Operands),
3421           "flat offset modifier is not supported on this GPU");
3422     return false;
3423   }
3424 
3425   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3426   // For FLAT segment the offset must be positive;
3427   // MSB is ignored and forced to zero.
3428   unsigned OffsetSize = isGFX9() ? 13 : 12;
3429   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3430     if (!isIntN(OffsetSize, Op.getImm())) {
3431       Error(getFlatOffsetLoc(Operands),
3432             isGFX9() ? "expected a 13-bit signed offset" :
3433                        "expected a 12-bit signed offset");
3434       return false;
3435     }
3436   } else {
3437     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3438       Error(getFlatOffsetLoc(Operands),
3439             isGFX9() ? "expected a 12-bit unsigned offset" :
3440                        "expected an 11-bit unsigned offset");
3441       return false;
3442     }
3443   }
3444 
3445   return true;
3446 }
3447 
3448 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3449   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3450     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3451     if (Op.isSMEMOffset())
3452       return Op.getStartLoc();
3453   }
3454   return getLoc();
3455 }
3456 
3457 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3458                                          const OperandVector &Operands) {
3459   if (isCI() || isSI())
3460     return true;
3461 
3462   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3463   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3464     return true;
3465 
3466   auto Opcode = Inst.getOpcode();
3467   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3468   if (OpNum == -1)
3469     return true;
3470 
3471   const auto &Op = Inst.getOperand(OpNum);
3472   if (!Op.isImm())
3473     return true;
3474 
3475   uint64_t Offset = Op.getImm();
3476   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3477   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3478       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3479     return true;
3480 
3481   Error(getSMEMOffsetLoc(Operands),
3482         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3483                                "expected a 21-bit signed offset");
3484 
3485   return false;
3486 }
3487 
3488 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3489   unsigned Opcode = Inst.getOpcode();
3490   const MCInstrDesc &Desc = MII.get(Opcode);
3491   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3492     return true;
3493 
3494   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3495   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3496 
3497   const int OpIndices[] = { Src0Idx, Src1Idx };
3498 
3499   unsigned NumExprs = 0;
3500   unsigned NumLiterals = 0;
3501   uint32_t LiteralValue;
3502 
3503   for (int OpIdx : OpIndices) {
3504     if (OpIdx == -1) break;
3505 
3506     const MCOperand &MO = Inst.getOperand(OpIdx);
3507     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3508     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3509       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3510         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3511         if (NumLiterals == 0 || LiteralValue != Value) {
3512           LiteralValue = Value;
3513           ++NumLiterals;
3514         }
3515       } else if (MO.isExpr()) {
3516         ++NumExprs;
3517       }
3518     }
3519   }
3520 
3521   return NumLiterals + NumExprs <= 1;
3522 }
3523 
3524 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3525   const unsigned Opc = Inst.getOpcode();
3526   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3527       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3528     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3529     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3530 
3531     if (OpSel & ~3)
3532       return false;
3533   }
3534   return true;
3535 }
3536 
3537 // Check if VCC register matches wavefront size
3538 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3539   auto FB = getFeatureBits();
3540   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3541     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3542 }
3543 
3544 // VOP3 literal is only allowed in GFX10+ and only one can be used
3545 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3546   unsigned Opcode = Inst.getOpcode();
3547   const MCInstrDesc &Desc = MII.get(Opcode);
3548   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3549     return true;
3550 
3551   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3552   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3553   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3554 
3555   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3556 
3557   unsigned NumExprs = 0;
3558   unsigned NumLiterals = 0;
3559   uint32_t LiteralValue;
3560 
3561   for (int OpIdx : OpIndices) {
3562     if (OpIdx == -1) break;
3563 
3564     const MCOperand &MO = Inst.getOperand(OpIdx);
3565     if (!MO.isImm() && !MO.isExpr())
3566       continue;
3567     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3568       continue;
3569 
3570     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3571         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3572       return false;
3573 
3574     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3575       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3576       if (NumLiterals == 0 || LiteralValue != Value) {
3577         LiteralValue = Value;
3578         ++NumLiterals;
3579       }
3580     } else if (MO.isExpr()) {
3581       ++NumExprs;
3582     }
3583   }
3584   NumLiterals += NumExprs;
3585 
3586   return !NumLiterals ||
3587          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3588 }
3589 
3590 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3591                                           const SMLoc &IDLoc,
3592                                           const OperandVector &Operands) {
3593   if (!validateLdsDirect(Inst)) {
3594     Error(IDLoc,
3595       "invalid use of lds_direct");
3596     return false;
3597   }
3598   if (!validateSOPLiteral(Inst)) {
3599     Error(IDLoc,
3600       "only one literal operand is allowed");
3601     return false;
3602   }
3603   if (!validateVOP3Literal(Inst)) {
3604     Error(IDLoc,
3605       "invalid literal operand");
3606     return false;
3607   }
3608   if (!validateConstantBusLimitations(Inst)) {
3609     Error(IDLoc,
3610       "invalid operand (violates constant bus restrictions)");
3611     return false;
3612   }
3613   if (!validateEarlyClobberLimitations(Inst)) {
3614     Error(IDLoc,
3615       "destination must be different than all sources");
3616     return false;
3617   }
3618   if (!validateIntClampSupported(Inst)) {
3619     Error(IDLoc,
3620       "integer clamping is not supported on this GPU");
3621     return false;
3622   }
3623   if (!validateOpSel(Inst)) {
3624     Error(IDLoc,
3625       "invalid op_sel operand");
3626     return false;
3627   }
3628   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3629   if (!validateMIMGD16(Inst)) {
3630     Error(IDLoc,
3631       "d16 modifier is not supported on this GPU");
3632     return false;
3633   }
3634   if (!validateMIMGDim(Inst)) {
3635     Error(IDLoc, "dim modifier is required on this GPU");
3636     return false;
3637   }
3638   if (!validateMIMGDataSize(Inst)) {
3639     Error(IDLoc,
3640       "image data size does not match dmask and tfe");
3641     return false;
3642   }
3643   if (!validateMIMGAddrSize(Inst)) {
3644     Error(IDLoc,
3645       "image address size does not match dim and a16");
3646     return false;
3647   }
3648   if (!validateMIMGAtomicDMask(Inst)) {
3649     Error(IDLoc,
3650       "invalid atomic image dmask");
3651     return false;
3652   }
3653   if (!validateMIMGGatherDMask(Inst)) {
3654     Error(IDLoc,
3655       "invalid image_gather dmask: only one bit must be set");
3656     return false;
3657   }
3658   if (!validateMovrels(Inst)) {
3659     Error(IDLoc, "source operand must be a VGPR");
3660     return false;
3661   }
3662   if (!validateFlatOffset(Inst, Operands)) {
3663     return false;
3664   }
3665   if (!validateSMEMOffset(Inst, Operands)) {
3666     return false;
3667   }
3668   if (!validateMAIAccWrite(Inst)) {
3669     return false;
3670   }
3671 
3672   return true;
3673 }
3674 
3675 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3676                                             const FeatureBitset &FBS,
3677                                             unsigned VariantID = 0);
3678 
3679 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3680                                               OperandVector &Operands,
3681                                               MCStreamer &Out,
3682                                               uint64_t &ErrorInfo,
3683                                               bool MatchingInlineAsm) {
3684   MCInst Inst;
3685   unsigned Result = Match_Success;
3686   for (auto Variant : getMatchedVariants()) {
3687     uint64_t EI;
3688     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3689                                   Variant);
3690     // We order match statuses from least to most specific. We use most specific
3691     // status as resulting
3692     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3693     if ((R == Match_Success) ||
3694         (R == Match_PreferE32) ||
3695         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3696         (R == Match_InvalidOperand && Result != Match_MissingFeature
3697                                    && Result != Match_PreferE32) ||
3698         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3699                                    && Result != Match_MissingFeature
3700                                    && Result != Match_PreferE32)) {
3701       Result = R;
3702       ErrorInfo = EI;
3703     }
3704     if (R == Match_Success)
3705       break;
3706   }
3707 
3708   switch (Result) {
3709   default: break;
3710   case Match_Success:
3711     if (!validateInstruction(Inst, IDLoc, Operands)) {
3712       return true;
3713     }
3714     Inst.setLoc(IDLoc);
3715     Out.emitInstruction(Inst, getSTI());
3716     return false;
3717 
3718   case Match_MissingFeature:
3719     return Error(IDLoc, "instruction not supported on this GPU");
3720 
3721   case Match_MnemonicFail: {
3722     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3723     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3724         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3725     return Error(IDLoc, "invalid instruction" + Suggestion,
3726                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3727   }
3728 
3729   case Match_InvalidOperand: {
3730     SMLoc ErrorLoc = IDLoc;
3731     if (ErrorInfo != ~0ULL) {
3732       if (ErrorInfo >= Operands.size()) {
3733         return Error(IDLoc, "too few operands for instruction");
3734       }
3735       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3736       if (ErrorLoc == SMLoc())
3737         ErrorLoc = IDLoc;
3738     }
3739     return Error(ErrorLoc, "invalid operand for instruction");
3740   }
3741 
3742   case Match_PreferE32:
3743     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3744                         "should be encoded as e32");
3745   }
3746   llvm_unreachable("Implement any new match types added!");
3747 }
3748 
3749 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3750   int64_t Tmp = -1;
3751   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3752     return true;
3753   }
3754   if (getParser().parseAbsoluteExpression(Tmp)) {
3755     return true;
3756   }
3757   Ret = static_cast<uint32_t>(Tmp);
3758   return false;
3759 }
3760 
3761 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3762                                                uint32_t &Minor) {
3763   if (ParseAsAbsoluteExpression(Major))
3764     return TokError("invalid major version");
3765 
3766   if (getLexer().isNot(AsmToken::Comma))
3767     return TokError("minor version number required, comma expected");
3768   Lex();
3769 
3770   if (ParseAsAbsoluteExpression(Minor))
3771     return TokError("invalid minor version");
3772 
3773   return false;
3774 }
3775 
3776 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3777   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3778     return TokError("directive only supported for amdgcn architecture");
3779 
3780   std::string Target;
3781 
3782   SMLoc TargetStart = getTok().getLoc();
3783   if (getParser().parseEscapedString(Target))
3784     return true;
3785   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3786 
3787   std::string ExpectedTarget;
3788   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3789   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3790 
3791   if (Target != ExpectedTargetOS.str())
3792     return getParser().Error(TargetRange.Start, "target must match options",
3793                              TargetRange);
3794 
3795   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3796   return false;
3797 }
3798 
3799 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3800   return getParser().Error(Range.Start, "value out of range", Range);
3801 }
3802 
3803 bool AMDGPUAsmParser::calculateGPRBlocks(
3804     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3805     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3806     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3807     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3808   // TODO(scott.linder): These calculations are duplicated from
3809   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3810   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3811 
3812   unsigned NumVGPRs = NextFreeVGPR;
3813   unsigned NumSGPRs = NextFreeSGPR;
3814 
3815   if (Version.Major >= 10)
3816     NumSGPRs = 0;
3817   else {
3818     unsigned MaxAddressableNumSGPRs =
3819         IsaInfo::getAddressableNumSGPRs(&getSTI());
3820 
3821     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3822         NumSGPRs > MaxAddressableNumSGPRs)
3823       return OutOfRangeError(SGPRRange);
3824 
3825     NumSGPRs +=
3826         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3827 
3828     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3829         NumSGPRs > MaxAddressableNumSGPRs)
3830       return OutOfRangeError(SGPRRange);
3831 
3832     if (Features.test(FeatureSGPRInitBug))
3833       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3834   }
3835 
3836   VGPRBlocks =
3837       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3838   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3839 
3840   return false;
3841 }
3842 
3843 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3844   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3845     return TokError("directive only supported for amdgcn architecture");
3846 
3847   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3848     return TokError("directive only supported for amdhsa OS");
3849 
3850   StringRef KernelName;
3851   if (getParser().parseIdentifier(KernelName))
3852     return true;
3853 
3854   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3855 
3856   StringSet<> Seen;
3857 
3858   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3859 
3860   SMRange VGPRRange;
3861   uint64_t NextFreeVGPR = 0;
3862   SMRange SGPRRange;
3863   uint64_t NextFreeSGPR = 0;
3864   unsigned UserSGPRCount = 0;
3865   bool ReserveVCC = true;
3866   bool ReserveFlatScr = true;
3867   bool ReserveXNACK = hasXNACK();
3868   Optional<bool> EnableWavefrontSize32;
3869 
3870   while (true) {
3871     while (getLexer().is(AsmToken::EndOfStatement))
3872       Lex();
3873 
3874     if (getLexer().isNot(AsmToken::Identifier))
3875       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3876 
3877     StringRef ID = getTok().getIdentifier();
3878     SMRange IDRange = getTok().getLocRange();
3879     Lex();
3880 
3881     if (ID == ".end_amdhsa_kernel")
3882       break;
3883 
3884     if (Seen.find(ID) != Seen.end())
3885       return TokError(".amdhsa_ directives cannot be repeated");
3886     Seen.insert(ID);
3887 
3888     SMLoc ValStart = getTok().getLoc();
3889     int64_t IVal;
3890     if (getParser().parseAbsoluteExpression(IVal))
3891       return true;
3892     SMLoc ValEnd = getTok().getLoc();
3893     SMRange ValRange = SMRange(ValStart, ValEnd);
3894 
3895     if (IVal < 0)
3896       return OutOfRangeError(ValRange);
3897 
3898     uint64_t Val = IVal;
3899 
3900 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3901   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3902     return OutOfRangeError(RANGE);                                             \
3903   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3904 
3905     if (ID == ".amdhsa_group_segment_fixed_size") {
3906       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3907         return OutOfRangeError(ValRange);
3908       KD.group_segment_fixed_size = Val;
3909     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3910       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3911         return OutOfRangeError(ValRange);
3912       KD.private_segment_fixed_size = Val;
3913     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3914       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3915                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3916                        Val, ValRange);
3917       if (Val)
3918         UserSGPRCount += 4;
3919     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3920       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3921                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3922                        ValRange);
3923       if (Val)
3924         UserSGPRCount += 2;
3925     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3926       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3927                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3928                        ValRange);
3929       if (Val)
3930         UserSGPRCount += 2;
3931     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3932       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3933                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3934                        Val, ValRange);
3935       if (Val)
3936         UserSGPRCount += 2;
3937     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3938       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3939                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3940                        ValRange);
3941       if (Val)
3942         UserSGPRCount += 2;
3943     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3944       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3945                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3946                        ValRange);
3947       if (Val)
3948         UserSGPRCount += 2;
3949     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3950       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3951                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3952                        Val, ValRange);
3953       if (Val)
3954         UserSGPRCount += 1;
3955     } else if (ID == ".amdhsa_wavefront_size32") {
3956       if (IVersion.Major < 10)
3957         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3958                                  IDRange);
3959       EnableWavefrontSize32 = Val;
3960       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3961                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3962                        Val, ValRange);
3963     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3964       PARSE_BITS_ENTRY(
3965           KD.compute_pgm_rsrc2,
3966           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3967           ValRange);
3968     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3969       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3970                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3971                        ValRange);
3972     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3973       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3974                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3975                        ValRange);
3976     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3977       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3978                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3979                        ValRange);
3980     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3981       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3982                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3983                        ValRange);
3984     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3985       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3986                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3987                        ValRange);
3988     } else if (ID == ".amdhsa_next_free_vgpr") {
3989       VGPRRange = ValRange;
3990       NextFreeVGPR = Val;
3991     } else if (ID == ".amdhsa_next_free_sgpr") {
3992       SGPRRange = ValRange;
3993       NextFreeSGPR = Val;
3994     } else if (ID == ".amdhsa_reserve_vcc") {
3995       if (!isUInt<1>(Val))
3996         return OutOfRangeError(ValRange);
3997       ReserveVCC = Val;
3998     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3999       if (IVersion.Major < 7)
4000         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4001                                  IDRange);
4002       if (!isUInt<1>(Val))
4003         return OutOfRangeError(ValRange);
4004       ReserveFlatScr = Val;
4005     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4006       if (IVersion.Major < 8)
4007         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4008                                  IDRange);
4009       if (!isUInt<1>(Val))
4010         return OutOfRangeError(ValRange);
4011       ReserveXNACK = Val;
4012     } else if (ID == ".amdhsa_float_round_mode_32") {
4013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4014                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4015     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4016       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4017                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4018     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4020                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4021     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4022       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4023                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4024                        ValRange);
4025     } else if (ID == ".amdhsa_dx10_clamp") {
4026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4027                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4028     } else if (ID == ".amdhsa_ieee_mode") {
4029       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4030                        Val, ValRange);
4031     } else if (ID == ".amdhsa_fp16_overflow") {
4032       if (IVersion.Major < 9)
4033         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4034                                  IDRange);
4035       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4036                        ValRange);
4037     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4038       if (IVersion.Major < 10)
4039         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4040                                  IDRange);
4041       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4042                        ValRange);
4043     } else if (ID == ".amdhsa_memory_ordered") {
4044       if (IVersion.Major < 10)
4045         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4046                                  IDRange);
4047       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4048                        ValRange);
4049     } else if (ID == ".amdhsa_forward_progress") {
4050       if (IVersion.Major < 10)
4051         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4052                                  IDRange);
4053       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4054                        ValRange);
4055     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4056       PARSE_BITS_ENTRY(
4057           KD.compute_pgm_rsrc2,
4058           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4059           ValRange);
4060     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4061       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4062                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4063                        Val, ValRange);
4064     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4065       PARSE_BITS_ENTRY(
4066           KD.compute_pgm_rsrc2,
4067           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4068           ValRange);
4069     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4070       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4071                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4072                        Val, ValRange);
4073     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4074       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4075                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4076                        Val, ValRange);
4077     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4078       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4079                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4080                        Val, ValRange);
4081     } else if (ID == ".amdhsa_exception_int_div_zero") {
4082       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4083                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4084                        Val, ValRange);
4085     } else {
4086       return getParser().Error(IDRange.Start,
4087                                "unknown .amdhsa_kernel directive", IDRange);
4088     }
4089 
4090 #undef PARSE_BITS_ENTRY
4091   }
4092 
4093   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4094     return TokError(".amdhsa_next_free_vgpr directive is required");
4095 
4096   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4097     return TokError(".amdhsa_next_free_sgpr directive is required");
4098 
4099   unsigned VGPRBlocks;
4100   unsigned SGPRBlocks;
4101   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4102                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4103                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4104                          SGPRBlocks))
4105     return true;
4106 
4107   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4108           VGPRBlocks))
4109     return OutOfRangeError(VGPRRange);
4110   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4111                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4112 
4113   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4114           SGPRBlocks))
4115     return OutOfRangeError(SGPRRange);
4116   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4117                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4118                   SGPRBlocks);
4119 
4120   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4121     return TokError("too many user SGPRs enabled");
4122   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4123                   UserSGPRCount);
4124 
4125   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4126       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4127       ReserveFlatScr, ReserveXNACK);
4128   return false;
4129 }
4130 
4131 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4132   uint32_t Major;
4133   uint32_t Minor;
4134 
4135   if (ParseDirectiveMajorMinor(Major, Minor))
4136     return true;
4137 
4138   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4139   return false;
4140 }
4141 
4142 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4143   uint32_t Major;
4144   uint32_t Minor;
4145   uint32_t Stepping;
4146   StringRef VendorName;
4147   StringRef ArchName;
4148 
4149   // If this directive has no arguments, then use the ISA version for the
4150   // targeted GPU.
4151   if (getLexer().is(AsmToken::EndOfStatement)) {
4152     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4153     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4154                                                       ISA.Stepping,
4155                                                       "AMD", "AMDGPU");
4156     return false;
4157   }
4158 
4159   if (ParseDirectiveMajorMinor(Major, Minor))
4160     return true;
4161 
4162   if (getLexer().isNot(AsmToken::Comma))
4163     return TokError("stepping version number required, comma expected");
4164   Lex();
4165 
4166   if (ParseAsAbsoluteExpression(Stepping))
4167     return TokError("invalid stepping version");
4168 
4169   if (getLexer().isNot(AsmToken::Comma))
4170     return TokError("vendor name required, comma expected");
4171   Lex();
4172 
4173   if (getLexer().isNot(AsmToken::String))
4174     return TokError("invalid vendor name");
4175 
4176   VendorName = getLexer().getTok().getStringContents();
4177   Lex();
4178 
4179   if (getLexer().isNot(AsmToken::Comma))
4180     return TokError("arch name required, comma expected");
4181   Lex();
4182 
4183   if (getLexer().isNot(AsmToken::String))
4184     return TokError("invalid arch name");
4185 
4186   ArchName = getLexer().getTok().getStringContents();
4187   Lex();
4188 
4189   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4190                                                     VendorName, ArchName);
4191   return false;
4192 }
4193 
4194 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4195                                                amd_kernel_code_t &Header) {
4196   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4197   // assembly for backwards compatibility.
4198   if (ID == "max_scratch_backing_memory_byte_size") {
4199     Parser.eatToEndOfStatement();
4200     return false;
4201   }
4202 
4203   SmallString<40> ErrStr;
4204   raw_svector_ostream Err(ErrStr);
4205   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4206     return TokError(Err.str());
4207   }
4208   Lex();
4209 
4210   if (ID == "enable_wavefront_size32") {
4211     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4212       if (!isGFX10())
4213         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4214       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4215         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4216     } else {
4217       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4218         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4219     }
4220   }
4221 
4222   if (ID == "wavefront_size") {
4223     if (Header.wavefront_size == 5) {
4224       if (!isGFX10())
4225         return TokError("wavefront_size=5 is only allowed on GFX10+");
4226       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4227         return TokError("wavefront_size=5 requires +WavefrontSize32");
4228     } else if (Header.wavefront_size == 6) {
4229       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4230         return TokError("wavefront_size=6 requires +WavefrontSize64");
4231     }
4232   }
4233 
4234   if (ID == "enable_wgp_mode") {
4235     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4236       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4237   }
4238 
4239   if (ID == "enable_mem_ordered") {
4240     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4241       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4242   }
4243 
4244   if (ID == "enable_fwd_progress") {
4245     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4246       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4247   }
4248 
4249   return false;
4250 }
4251 
4252 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4253   amd_kernel_code_t Header;
4254   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4255 
4256   while (true) {
4257     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4258     // will set the current token to EndOfStatement.
4259     while(getLexer().is(AsmToken::EndOfStatement))
4260       Lex();
4261 
4262     if (getLexer().isNot(AsmToken::Identifier))
4263       return TokError("expected value identifier or .end_amd_kernel_code_t");
4264 
4265     StringRef ID = getLexer().getTok().getIdentifier();
4266     Lex();
4267 
4268     if (ID == ".end_amd_kernel_code_t")
4269       break;
4270 
4271     if (ParseAMDKernelCodeTValue(ID, Header))
4272       return true;
4273   }
4274 
4275   getTargetStreamer().EmitAMDKernelCodeT(Header);
4276 
4277   return false;
4278 }
4279 
4280 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4281   if (getLexer().isNot(AsmToken::Identifier))
4282     return TokError("expected symbol name");
4283 
4284   StringRef KernelName = Parser.getTok().getString();
4285 
4286   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4287                                            ELF::STT_AMDGPU_HSA_KERNEL);
4288   Lex();
4289   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4290     KernelScope.initialize(getContext());
4291   return false;
4292 }
4293 
4294 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4295   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4296     return Error(getParser().getTok().getLoc(),
4297                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4298                  "architectures");
4299   }
4300 
4301   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4302 
4303   std::string ISAVersionStringFromSTI;
4304   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4305   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4306 
4307   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4308     return Error(getParser().getTok().getLoc(),
4309                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4310                  "arguments specified through the command line");
4311   }
4312 
4313   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4314   Lex();
4315 
4316   return false;
4317 }
4318 
4319 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4320   const char *AssemblerDirectiveBegin;
4321   const char *AssemblerDirectiveEnd;
4322   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4323       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4324           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4325                             HSAMD::V3::AssemblerDirectiveEnd)
4326           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4327                             HSAMD::AssemblerDirectiveEnd);
4328 
4329   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4330     return Error(getParser().getTok().getLoc(),
4331                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4332                  "not available on non-amdhsa OSes")).str());
4333   }
4334 
4335   std::string HSAMetadataString;
4336   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4337                           HSAMetadataString))
4338     return true;
4339 
4340   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4341     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4342       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4343   } else {
4344     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4345       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4346   }
4347 
4348   return false;
4349 }
4350 
4351 /// Common code to parse out a block of text (typically YAML) between start and
4352 /// end directives.
4353 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4354                                           const char *AssemblerDirectiveEnd,
4355                                           std::string &CollectString) {
4356 
4357   raw_string_ostream CollectStream(CollectString);
4358 
4359   getLexer().setSkipSpace(false);
4360 
4361   bool FoundEnd = false;
4362   while (!getLexer().is(AsmToken::Eof)) {
4363     while (getLexer().is(AsmToken::Space)) {
4364       CollectStream << getLexer().getTok().getString();
4365       Lex();
4366     }
4367 
4368     if (getLexer().is(AsmToken::Identifier)) {
4369       StringRef ID = getLexer().getTok().getIdentifier();
4370       if (ID == AssemblerDirectiveEnd) {
4371         Lex();
4372         FoundEnd = true;
4373         break;
4374       }
4375     }
4376 
4377     CollectStream << Parser.parseStringToEndOfStatement()
4378                   << getContext().getAsmInfo()->getSeparatorString();
4379 
4380     Parser.eatToEndOfStatement();
4381   }
4382 
4383   getLexer().setSkipSpace(true);
4384 
4385   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4386     return TokError(Twine("expected directive ") +
4387                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4388   }
4389 
4390   CollectStream.flush();
4391   return false;
4392 }
4393 
4394 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4395 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4396   std::string String;
4397   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4398                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4399     return true;
4400 
4401   auto PALMetadata = getTargetStreamer().getPALMetadata();
4402   if (!PALMetadata->setFromString(String))
4403     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4404   return false;
4405 }
4406 
4407 /// Parse the assembler directive for old linear-format PAL metadata.
4408 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4409   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4410     return Error(getParser().getTok().getLoc(),
4411                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4412                  "not available on non-amdpal OSes")).str());
4413   }
4414 
4415   auto PALMetadata = getTargetStreamer().getPALMetadata();
4416   PALMetadata->setLegacy();
4417   for (;;) {
4418     uint32_t Key, Value;
4419     if (ParseAsAbsoluteExpression(Key)) {
4420       return TokError(Twine("invalid value in ") +
4421                       Twine(PALMD::AssemblerDirective));
4422     }
4423     if (getLexer().isNot(AsmToken::Comma)) {
4424       return TokError(Twine("expected an even number of values in ") +
4425                       Twine(PALMD::AssemblerDirective));
4426     }
4427     Lex();
4428     if (ParseAsAbsoluteExpression(Value)) {
4429       return TokError(Twine("invalid value in ") +
4430                       Twine(PALMD::AssemblerDirective));
4431     }
4432     PALMetadata->setRegister(Key, Value);
4433     if (getLexer().isNot(AsmToken::Comma))
4434       break;
4435     Lex();
4436   }
4437   return false;
4438 }
4439 
4440 /// ParseDirectiveAMDGPULDS
4441 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4442 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4443   if (getParser().checkForValidSection())
4444     return true;
4445 
4446   StringRef Name;
4447   SMLoc NameLoc = getLexer().getLoc();
4448   if (getParser().parseIdentifier(Name))
4449     return TokError("expected identifier in directive");
4450 
4451   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4452   if (parseToken(AsmToken::Comma, "expected ','"))
4453     return true;
4454 
4455   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4456 
4457   int64_t Size;
4458   SMLoc SizeLoc = getLexer().getLoc();
4459   if (getParser().parseAbsoluteExpression(Size))
4460     return true;
4461   if (Size < 0)
4462     return Error(SizeLoc, "size must be non-negative");
4463   if (Size > LocalMemorySize)
4464     return Error(SizeLoc, "size is too large");
4465 
4466   int64_t Alignment = 4;
4467   if (getLexer().is(AsmToken::Comma)) {
4468     Lex();
4469     SMLoc AlignLoc = getLexer().getLoc();
4470     if (getParser().parseAbsoluteExpression(Alignment))
4471       return true;
4472     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4473       return Error(AlignLoc, "alignment must be a power of two");
4474 
4475     // Alignment larger than the size of LDS is possible in theory, as long
4476     // as the linker manages to place to symbol at address 0, but we do want
4477     // to make sure the alignment fits nicely into a 32-bit integer.
4478     if (Alignment >= 1u << 31)
4479       return Error(AlignLoc, "alignment is too large");
4480   }
4481 
4482   if (parseToken(AsmToken::EndOfStatement,
4483                  "unexpected token in '.amdgpu_lds' directive"))
4484     return true;
4485 
4486   Symbol->redefineIfPossible();
4487   if (!Symbol->isUndefined())
4488     return Error(NameLoc, "invalid symbol redefinition");
4489 
4490   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4491   return false;
4492 }
4493 
4494 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4495   StringRef IDVal = DirectiveID.getString();
4496 
4497   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4498     if (IDVal == ".amdgcn_target")
4499       return ParseDirectiveAMDGCNTarget();
4500 
4501     if (IDVal == ".amdhsa_kernel")
4502       return ParseDirectiveAMDHSAKernel();
4503 
4504     // TODO: Restructure/combine with PAL metadata directive.
4505     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4506       return ParseDirectiveHSAMetadata();
4507   } else {
4508     if (IDVal == ".hsa_code_object_version")
4509       return ParseDirectiveHSACodeObjectVersion();
4510 
4511     if (IDVal == ".hsa_code_object_isa")
4512       return ParseDirectiveHSACodeObjectISA();
4513 
4514     if (IDVal == ".amd_kernel_code_t")
4515       return ParseDirectiveAMDKernelCodeT();
4516 
4517     if (IDVal == ".amdgpu_hsa_kernel")
4518       return ParseDirectiveAMDGPUHsaKernel();
4519 
4520     if (IDVal == ".amd_amdgpu_isa")
4521       return ParseDirectiveISAVersion();
4522 
4523     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4524       return ParseDirectiveHSAMetadata();
4525   }
4526 
4527   if (IDVal == ".amdgpu_lds")
4528     return ParseDirectiveAMDGPULDS();
4529 
4530   if (IDVal == PALMD::AssemblerDirectiveBegin)
4531     return ParseDirectivePALMetadataBegin();
4532 
4533   if (IDVal == PALMD::AssemblerDirective)
4534     return ParseDirectivePALMetadata();
4535 
4536   return true;
4537 }
4538 
4539 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4540                                            unsigned RegNo) const {
4541 
4542   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4543        R.isValid(); ++R) {
4544     if (*R == RegNo)
4545       return isGFX9() || isGFX10();
4546   }
4547 
4548   // GFX10 has 2 more SGPRs 104 and 105.
4549   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4550        R.isValid(); ++R) {
4551     if (*R == RegNo)
4552       return hasSGPR104_SGPR105();
4553   }
4554 
4555   switch (RegNo) {
4556   case AMDGPU::SRC_SHARED_BASE:
4557   case AMDGPU::SRC_SHARED_LIMIT:
4558   case AMDGPU::SRC_PRIVATE_BASE:
4559   case AMDGPU::SRC_PRIVATE_LIMIT:
4560   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4561     return !isCI() && !isSI() && !isVI();
4562   case AMDGPU::TBA:
4563   case AMDGPU::TBA_LO:
4564   case AMDGPU::TBA_HI:
4565   case AMDGPU::TMA:
4566   case AMDGPU::TMA_LO:
4567   case AMDGPU::TMA_HI:
4568     return !isGFX9() && !isGFX10();
4569   case AMDGPU::XNACK_MASK:
4570   case AMDGPU::XNACK_MASK_LO:
4571   case AMDGPU::XNACK_MASK_HI:
4572     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4573   case AMDGPU::SGPR_NULL:
4574     return isGFX10();
4575   default:
4576     break;
4577   }
4578 
4579   if (isCI())
4580     return true;
4581 
4582   if (isSI() || isGFX10()) {
4583     // No flat_scr on SI.
4584     // On GFX10 flat scratch is not a valid register operand and can only be
4585     // accessed with s_setreg/s_getreg.
4586     switch (RegNo) {
4587     case AMDGPU::FLAT_SCR:
4588     case AMDGPU::FLAT_SCR_LO:
4589     case AMDGPU::FLAT_SCR_HI:
4590       return false;
4591     default:
4592       return true;
4593     }
4594   }
4595 
4596   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4597   // SI/CI have.
4598   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4599        R.isValid(); ++R) {
4600     if (*R == RegNo)
4601       return hasSGPR102_SGPR103();
4602   }
4603 
4604   return true;
4605 }
4606 
4607 OperandMatchResultTy
4608 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4609                               OperandMode Mode) {
4610   // Try to parse with a custom parser
4611   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4612 
4613   // If we successfully parsed the operand or if there as an error parsing,
4614   // we are done.
4615   //
4616   // If we are parsing after we reach EndOfStatement then this means we
4617   // are appending default values to the Operands list.  This is only done
4618   // by custom parser, so we shouldn't continue on to the generic parsing.
4619   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4620       getLexer().is(AsmToken::EndOfStatement))
4621     return ResTy;
4622 
4623   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4624     unsigned Prefix = Operands.size();
4625     SMLoc LBraceLoc = getTok().getLoc();
4626     Parser.Lex(); // eat the '['
4627 
4628     for (;;) {
4629       ResTy = parseReg(Operands);
4630       if (ResTy != MatchOperand_Success)
4631         return ResTy;
4632 
4633       if (getLexer().is(AsmToken::RBrac))
4634         break;
4635 
4636       if (getLexer().isNot(AsmToken::Comma))
4637         return MatchOperand_ParseFail;
4638       Parser.Lex();
4639     }
4640 
4641     if (Operands.size() - Prefix > 1) {
4642       Operands.insert(Operands.begin() + Prefix,
4643                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4644       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4645                                                     getTok().getLoc()));
4646     }
4647 
4648     Parser.Lex(); // eat the ']'
4649     return MatchOperand_Success;
4650   }
4651 
4652   return parseRegOrImm(Operands);
4653 }
4654 
4655 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4656   // Clear any forced encodings from the previous instruction.
4657   setForcedEncodingSize(0);
4658   setForcedDPP(false);
4659   setForcedSDWA(false);
4660 
4661   if (Name.endswith("_e64")) {
4662     setForcedEncodingSize(64);
4663     return Name.substr(0, Name.size() - 4);
4664   } else if (Name.endswith("_e32")) {
4665     setForcedEncodingSize(32);
4666     return Name.substr(0, Name.size() - 4);
4667   } else if (Name.endswith("_dpp")) {
4668     setForcedDPP(true);
4669     return Name.substr(0, Name.size() - 4);
4670   } else if (Name.endswith("_sdwa")) {
4671     setForcedSDWA(true);
4672     return Name.substr(0, Name.size() - 5);
4673   }
4674   return Name;
4675 }
4676 
4677 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4678                                        StringRef Name,
4679                                        SMLoc NameLoc, OperandVector &Operands) {
4680   // Add the instruction mnemonic
4681   Name = parseMnemonicSuffix(Name);
4682   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4683 
4684   bool IsMIMG = Name.startswith("image_");
4685 
4686   while (!getLexer().is(AsmToken::EndOfStatement)) {
4687     OperandMode Mode = OperandMode_Default;
4688     if (IsMIMG && isGFX10() && Operands.size() == 2)
4689       Mode = OperandMode_NSA;
4690     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4691 
4692     // Eat the comma or space if there is one.
4693     if (getLexer().is(AsmToken::Comma))
4694       Parser.Lex();
4695 
4696     switch (Res) {
4697       case MatchOperand_Success: break;
4698       case MatchOperand_ParseFail:
4699         // FIXME: use real operand location rather than the current location.
4700         Error(getLexer().getLoc(), "failed parsing operand.");
4701         while (!getLexer().is(AsmToken::EndOfStatement)) {
4702           Parser.Lex();
4703         }
4704         return true;
4705       case MatchOperand_NoMatch:
4706         // FIXME: use real operand location rather than the current location.
4707         Error(getLexer().getLoc(), "not a valid operand.");
4708         while (!getLexer().is(AsmToken::EndOfStatement)) {
4709           Parser.Lex();
4710         }
4711         return true;
4712     }
4713   }
4714 
4715   return false;
4716 }
4717 
4718 //===----------------------------------------------------------------------===//
4719 // Utility functions
4720 //===----------------------------------------------------------------------===//
4721 
4722 OperandMatchResultTy
4723 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4724 
4725   if (!trySkipId(Prefix, AsmToken::Colon))
4726     return MatchOperand_NoMatch;
4727 
4728   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4729 }
4730 
4731 OperandMatchResultTy
4732 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4733                                     AMDGPUOperand::ImmTy ImmTy,
4734                                     bool (*ConvertResult)(int64_t&)) {
4735   SMLoc S = getLoc();
4736   int64_t Value = 0;
4737 
4738   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4739   if (Res != MatchOperand_Success)
4740     return Res;
4741 
4742   if (ConvertResult && !ConvertResult(Value)) {
4743     Error(S, "invalid " + StringRef(Prefix) + " value.");
4744   }
4745 
4746   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4747   return MatchOperand_Success;
4748 }
4749 
4750 OperandMatchResultTy
4751 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4752                                              OperandVector &Operands,
4753                                              AMDGPUOperand::ImmTy ImmTy,
4754                                              bool (*ConvertResult)(int64_t&)) {
4755   SMLoc S = getLoc();
4756   if (!trySkipId(Prefix, AsmToken::Colon))
4757     return MatchOperand_NoMatch;
4758 
4759   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4760     return MatchOperand_ParseFail;
4761 
4762   unsigned Val = 0;
4763   const unsigned MaxSize = 4;
4764 
4765   // FIXME: How to verify the number of elements matches the number of src
4766   // operands?
4767   for (int I = 0; ; ++I) {
4768     int64_t Op;
4769     SMLoc Loc = getLoc();
4770     if (!parseExpr(Op))
4771       return MatchOperand_ParseFail;
4772 
4773     if (Op != 0 && Op != 1) {
4774       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4775       return MatchOperand_ParseFail;
4776     }
4777 
4778     Val |= (Op << I);
4779 
4780     if (trySkipToken(AsmToken::RBrac))
4781       break;
4782 
4783     if (I + 1 == MaxSize) {
4784       Error(getLoc(), "expected a closing square bracket");
4785       return MatchOperand_ParseFail;
4786     }
4787 
4788     if (!skipToken(AsmToken::Comma, "expected a comma"))
4789       return MatchOperand_ParseFail;
4790   }
4791 
4792   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4793   return MatchOperand_Success;
4794 }
4795 
4796 OperandMatchResultTy
4797 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4798                                AMDGPUOperand::ImmTy ImmTy) {
4799   int64_t Bit = 0;
4800   SMLoc S = Parser.getTok().getLoc();
4801 
4802   // We are at the end of the statement, and this is a default argument, so
4803   // use a default value.
4804   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4805     switch(getLexer().getKind()) {
4806       case AsmToken::Identifier: {
4807         StringRef Tok = Parser.getTok().getString();
4808         if (Tok == Name) {
4809           if (Tok == "r128" && !hasMIMG_R128())
4810             Error(S, "r128 modifier is not supported on this GPU");
4811           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4812             Error(S, "a16 modifier is not supported on this GPU");
4813           Bit = 1;
4814           Parser.Lex();
4815         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4816           Bit = 0;
4817           Parser.Lex();
4818         } else {
4819           return MatchOperand_NoMatch;
4820         }
4821         break;
4822       }
4823       default:
4824         return MatchOperand_NoMatch;
4825     }
4826   }
4827 
4828   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4829     return MatchOperand_ParseFail;
4830 
4831   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4832     ImmTy = AMDGPUOperand::ImmTyR128A16;
4833 
4834   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4835   return MatchOperand_Success;
4836 }
4837 
4838 static void addOptionalImmOperand(
4839   MCInst& Inst, const OperandVector& Operands,
4840   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4841   AMDGPUOperand::ImmTy ImmT,
4842   int64_t Default = 0) {
4843   auto i = OptionalIdx.find(ImmT);
4844   if (i != OptionalIdx.end()) {
4845     unsigned Idx = i->second;
4846     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4847   } else {
4848     Inst.addOperand(MCOperand::createImm(Default));
4849   }
4850 }
4851 
4852 OperandMatchResultTy
4853 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4854   if (getLexer().isNot(AsmToken::Identifier)) {
4855     return MatchOperand_NoMatch;
4856   }
4857   StringRef Tok = Parser.getTok().getString();
4858   if (Tok != Prefix) {
4859     return MatchOperand_NoMatch;
4860   }
4861 
4862   Parser.Lex();
4863   if (getLexer().isNot(AsmToken::Colon)) {
4864     return MatchOperand_ParseFail;
4865   }
4866 
4867   Parser.Lex();
4868   if (getLexer().isNot(AsmToken::Identifier)) {
4869     return MatchOperand_ParseFail;
4870   }
4871 
4872   Value = Parser.getTok().getString();
4873   return MatchOperand_Success;
4874 }
4875 
4876 //===----------------------------------------------------------------------===//
4877 // MTBUF format
4878 //===----------------------------------------------------------------------===//
4879 
4880 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
4881                                   int64_t MaxVal,
4882                                   int64_t &Fmt) {
4883   int64_t Val;
4884   SMLoc Loc = getLoc();
4885 
4886   auto Res = parseIntWithPrefix(Pref, Val);
4887   if (Res == MatchOperand_ParseFail)
4888     return false;
4889   if (Res == MatchOperand_NoMatch)
4890     return true;
4891 
4892   if (Val < 0 || Val > MaxVal) {
4893     Error(Loc, Twine("out of range ", StringRef(Pref)));
4894     return false;
4895   }
4896 
4897   Fmt = Val;
4898   return true;
4899 }
4900 
4901 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4902 // values to live in a joint format operand in the MCInst encoding.
4903 OperandMatchResultTy
4904 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
4905   using namespace llvm::AMDGPU::MTBUFFormat;
4906 
4907   int64_t Dfmt = DFMT_UNDEF;
4908   int64_t Nfmt = NFMT_UNDEF;
4909 
4910   // dfmt and nfmt can appear in either order, and each is optional.
4911   for (int I = 0; I < 2; ++I) {
4912     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
4913       return MatchOperand_ParseFail;
4914 
4915     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
4916       return MatchOperand_ParseFail;
4917     }
4918     // Skip optional comma between dfmt/nfmt
4919     // but guard against 2 commas following each other.
4920     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
4921         !peekToken().is(AsmToken::Comma)) {
4922       trySkipToken(AsmToken::Comma);
4923     }
4924   }
4925 
4926   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
4927     return MatchOperand_NoMatch;
4928 
4929   Dfmt = (Dfmt == DFMT_UNDEF)? DFMT_DEFAULT : Dfmt;
4930   Nfmt = (Nfmt == NFMT_UNDEF)? NFMT_DEFAULT : Nfmt;
4931 
4932   Format = encodeDfmtNfmt(Dfmt, Nfmt);
4933   return MatchOperand_Success;
4934 }
4935 
4936 OperandMatchResultTy
4937 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
4938   using namespace llvm::AMDGPU::MTBUFFormat;
4939 
4940   int64_t Fmt = UFMT_UNDEF;
4941 
4942   if (!tryParseFmt("format", UFMT_MAX, Fmt))
4943     return MatchOperand_ParseFail;
4944 
4945   if (Fmt == UFMT_UNDEF)
4946     return MatchOperand_NoMatch;
4947 
4948   Format = Fmt;
4949   return MatchOperand_Success;
4950 }
4951 
4952 OperandMatchResultTy
4953 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
4954   using namespace llvm::AMDGPU::MTBUFFormat;
4955 
4956   int64_t Format = isGFX10() ? UFMT_DEFAULT : DFMT_NFMT_DEFAULT;
4957   OperandMatchResultTy Res;
4958   SMLoc Loc = getLoc();
4959 
4960   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
4961   if (Res == MatchOperand_ParseFail)
4962     return Res;
4963 
4964   Operands.push_back(
4965     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
4966   return MatchOperand_Success;
4967 }
4968 
4969 //===----------------------------------------------------------------------===//
4970 // ds
4971 //===----------------------------------------------------------------------===//
4972 
4973 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4974                                     const OperandVector &Operands) {
4975   OptionalImmIndexMap OptionalIdx;
4976 
4977   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4978     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4979 
4980     // Add the register arguments
4981     if (Op.isReg()) {
4982       Op.addRegOperands(Inst, 1);
4983       continue;
4984     }
4985 
4986     // Handle optional arguments
4987     OptionalIdx[Op.getImmTy()] = i;
4988   }
4989 
4990   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4993 
4994   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4995 }
4996 
4997 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4998                                 bool IsGdsHardcoded) {
4999   OptionalImmIndexMap OptionalIdx;
5000 
5001   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5002     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5003 
5004     // Add the register arguments
5005     if (Op.isReg()) {
5006       Op.addRegOperands(Inst, 1);
5007       continue;
5008     }
5009 
5010     if (Op.isToken() && Op.getToken() == "gds") {
5011       IsGdsHardcoded = true;
5012       continue;
5013     }
5014 
5015     // Handle optional arguments
5016     OptionalIdx[Op.getImmTy()] = i;
5017   }
5018 
5019   AMDGPUOperand::ImmTy OffsetType =
5020     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5021      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5022      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5023                                                       AMDGPUOperand::ImmTyOffset;
5024 
5025   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5026 
5027   if (!IsGdsHardcoded) {
5028     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5029   }
5030   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5031 }
5032 
5033 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5034   OptionalImmIndexMap OptionalIdx;
5035 
5036   unsigned OperandIdx[4];
5037   unsigned EnMask = 0;
5038   int SrcIdx = 0;
5039 
5040   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5041     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5042 
5043     // Add the register arguments
5044     if (Op.isReg()) {
5045       assert(SrcIdx < 4);
5046       OperandIdx[SrcIdx] = Inst.size();
5047       Op.addRegOperands(Inst, 1);
5048       ++SrcIdx;
5049       continue;
5050     }
5051 
5052     if (Op.isOff()) {
5053       assert(SrcIdx < 4);
5054       OperandIdx[SrcIdx] = Inst.size();
5055       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5056       ++SrcIdx;
5057       continue;
5058     }
5059 
5060     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5061       Op.addImmOperands(Inst, 1);
5062       continue;
5063     }
5064 
5065     if (Op.isToken() && Op.getToken() == "done")
5066       continue;
5067 
5068     // Handle optional arguments
5069     OptionalIdx[Op.getImmTy()] = i;
5070   }
5071 
5072   assert(SrcIdx == 4);
5073 
5074   bool Compr = false;
5075   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5076     Compr = true;
5077     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5078     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5079     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5080   }
5081 
5082   for (auto i = 0; i < SrcIdx; ++i) {
5083     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5084       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5085     }
5086   }
5087 
5088   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5089   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5090 
5091   Inst.addOperand(MCOperand::createImm(EnMask));
5092 }
5093 
5094 //===----------------------------------------------------------------------===//
5095 // s_waitcnt
5096 //===----------------------------------------------------------------------===//
5097 
5098 static bool
5099 encodeCnt(
5100   const AMDGPU::IsaVersion ISA,
5101   int64_t &IntVal,
5102   int64_t CntVal,
5103   bool Saturate,
5104   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5105   unsigned (*decode)(const IsaVersion &Version, unsigned))
5106 {
5107   bool Failed = false;
5108 
5109   IntVal = encode(ISA, IntVal, CntVal);
5110   if (CntVal != decode(ISA, IntVal)) {
5111     if (Saturate) {
5112       IntVal = encode(ISA, IntVal, -1);
5113     } else {
5114       Failed = true;
5115     }
5116   }
5117   return Failed;
5118 }
5119 
5120 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5121 
5122   SMLoc CntLoc = getLoc();
5123   StringRef CntName = getTokenStr();
5124 
5125   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5126       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5127     return false;
5128 
5129   int64_t CntVal;
5130   SMLoc ValLoc = getLoc();
5131   if (!parseExpr(CntVal))
5132     return false;
5133 
5134   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5135 
5136   bool Failed = true;
5137   bool Sat = CntName.endswith("_sat");
5138 
5139   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5140     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5141   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5142     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5143   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5144     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5145   } else {
5146     Error(CntLoc, "invalid counter name " + CntName);
5147     return false;
5148   }
5149 
5150   if (Failed) {
5151     Error(ValLoc, "too large value for " + CntName);
5152     return false;
5153   }
5154 
5155   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5156     return false;
5157 
5158   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5159     if (isToken(AsmToken::EndOfStatement)) {
5160       Error(getLoc(), "expected a counter name");
5161       return false;
5162     }
5163   }
5164 
5165   return true;
5166 }
5167 
5168 OperandMatchResultTy
5169 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5170   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5171   int64_t Waitcnt = getWaitcntBitMask(ISA);
5172   SMLoc S = getLoc();
5173 
5174   // If parse failed, do not return error code
5175   // to avoid excessive error messages.
5176   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5177     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5178   } else {
5179     parseExpr(Waitcnt);
5180   }
5181 
5182   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5183   return MatchOperand_Success;
5184 }
5185 
5186 bool
5187 AMDGPUOperand::isSWaitCnt() const {
5188   return isImm();
5189 }
5190 
5191 //===----------------------------------------------------------------------===//
5192 // hwreg
5193 //===----------------------------------------------------------------------===//
5194 
5195 bool
5196 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5197                                 int64_t &Offset,
5198                                 int64_t &Width) {
5199   using namespace llvm::AMDGPU::Hwreg;
5200 
5201   // The register may be specified by name or using a numeric code
5202   if (isToken(AsmToken::Identifier) &&
5203       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5204     HwReg.IsSymbolic = true;
5205     lex(); // skip message name
5206   } else if (!parseExpr(HwReg.Id)) {
5207     return false;
5208   }
5209 
5210   if (trySkipToken(AsmToken::RParen))
5211     return true;
5212 
5213   // parse optional params
5214   return
5215     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5216     parseExpr(Offset) &&
5217     skipToken(AsmToken::Comma, "expected a comma") &&
5218     parseExpr(Width) &&
5219     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5220 }
5221 
5222 bool
5223 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5224                                const int64_t Offset,
5225                                const int64_t Width,
5226                                const SMLoc Loc) {
5227 
5228   using namespace llvm::AMDGPU::Hwreg;
5229 
5230   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5231     Error(Loc, "specified hardware register is not supported on this GPU");
5232     return false;
5233   } else if (!isValidHwreg(HwReg.Id)) {
5234     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5235     return false;
5236   } else if (!isValidHwregOffset(Offset)) {
5237     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5238     return false;
5239   } else if (!isValidHwregWidth(Width)) {
5240     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5241     return false;
5242   }
5243   return true;
5244 }
5245 
5246 OperandMatchResultTy
5247 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5248   using namespace llvm::AMDGPU::Hwreg;
5249 
5250   int64_t ImmVal = 0;
5251   SMLoc Loc = getLoc();
5252 
5253   // If parse failed, do not return error code
5254   // to avoid excessive error messages.
5255   if (trySkipId("hwreg", AsmToken::LParen)) {
5256     OperandInfoTy HwReg(ID_UNKNOWN_);
5257     int64_t Offset = OFFSET_DEFAULT_;
5258     int64_t Width = WIDTH_DEFAULT_;
5259     if (parseHwregBody(HwReg, Offset, Width) &&
5260         validateHwreg(HwReg, Offset, Width, Loc)) {
5261       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5262     }
5263   } else if (parseExpr(ImmVal)) {
5264     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5265       Error(Loc, "invalid immediate: only 16-bit values are legal");
5266   }
5267 
5268   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5269   return MatchOperand_Success;
5270 }
5271 
5272 bool AMDGPUOperand::isHwreg() const {
5273   return isImmTy(ImmTyHwreg);
5274 }
5275 
5276 //===----------------------------------------------------------------------===//
5277 // sendmsg
5278 //===----------------------------------------------------------------------===//
5279 
5280 bool
5281 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5282                                   OperandInfoTy &Op,
5283                                   OperandInfoTy &Stream) {
5284   using namespace llvm::AMDGPU::SendMsg;
5285 
5286   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5287     Msg.IsSymbolic = true;
5288     lex(); // skip message name
5289   } else if (!parseExpr(Msg.Id)) {
5290     return false;
5291   }
5292 
5293   if (trySkipToken(AsmToken::Comma)) {
5294     Op.IsDefined = true;
5295     if (isToken(AsmToken::Identifier) &&
5296         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5297       lex(); // skip operation name
5298     } else if (!parseExpr(Op.Id)) {
5299       return false;
5300     }
5301 
5302     if (trySkipToken(AsmToken::Comma)) {
5303       Stream.IsDefined = true;
5304       if (!parseExpr(Stream.Id))
5305         return false;
5306     }
5307   }
5308 
5309   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5310 }
5311 
5312 bool
5313 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5314                                  const OperandInfoTy &Op,
5315                                  const OperandInfoTy &Stream,
5316                                  const SMLoc S) {
5317   using namespace llvm::AMDGPU::SendMsg;
5318 
5319   // Validation strictness depends on whether message is specified
5320   // in a symbolc or in a numeric form. In the latter case
5321   // only encoding possibility is checked.
5322   bool Strict = Msg.IsSymbolic;
5323 
5324   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5325     Error(S, "invalid message id");
5326     return false;
5327   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5328     Error(S, Op.IsDefined ?
5329              "message does not support operations" :
5330              "missing message operation");
5331     return false;
5332   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5333     Error(S, "invalid operation id");
5334     return false;
5335   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5336     Error(S, "message operation does not support streams");
5337     return false;
5338   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5339     Error(S, "invalid message stream id");
5340     return false;
5341   }
5342   return true;
5343 }
5344 
5345 OperandMatchResultTy
5346 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5347   using namespace llvm::AMDGPU::SendMsg;
5348 
5349   int64_t ImmVal = 0;
5350   SMLoc Loc = getLoc();
5351 
5352   // If parse failed, do not return error code
5353   // to avoid excessive error messages.
5354   if (trySkipId("sendmsg", AsmToken::LParen)) {
5355     OperandInfoTy Msg(ID_UNKNOWN_);
5356     OperandInfoTy Op(OP_NONE_);
5357     OperandInfoTy Stream(STREAM_ID_NONE_);
5358     if (parseSendMsgBody(Msg, Op, Stream) &&
5359         validateSendMsg(Msg, Op, Stream, Loc)) {
5360       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5361     }
5362   } else if (parseExpr(ImmVal)) {
5363     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5364       Error(Loc, "invalid immediate: only 16-bit values are legal");
5365   }
5366 
5367   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5368   return MatchOperand_Success;
5369 }
5370 
5371 bool AMDGPUOperand::isSendMsg() const {
5372   return isImmTy(ImmTySendMsg);
5373 }
5374 
5375 //===----------------------------------------------------------------------===//
5376 // v_interp
5377 //===----------------------------------------------------------------------===//
5378 
5379 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5380   if (getLexer().getKind() != AsmToken::Identifier)
5381     return MatchOperand_NoMatch;
5382 
5383   StringRef Str = Parser.getTok().getString();
5384   int Slot = StringSwitch<int>(Str)
5385     .Case("p10", 0)
5386     .Case("p20", 1)
5387     .Case("p0", 2)
5388     .Default(-1);
5389 
5390   SMLoc S = Parser.getTok().getLoc();
5391   if (Slot == -1)
5392     return MatchOperand_ParseFail;
5393 
5394   Parser.Lex();
5395   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5396                                               AMDGPUOperand::ImmTyInterpSlot));
5397   return MatchOperand_Success;
5398 }
5399 
5400 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5401   if (getLexer().getKind() != AsmToken::Identifier)
5402     return MatchOperand_NoMatch;
5403 
5404   StringRef Str = Parser.getTok().getString();
5405   if (!Str.startswith("attr"))
5406     return MatchOperand_NoMatch;
5407 
5408   StringRef Chan = Str.take_back(2);
5409   int AttrChan = StringSwitch<int>(Chan)
5410     .Case(".x", 0)
5411     .Case(".y", 1)
5412     .Case(".z", 2)
5413     .Case(".w", 3)
5414     .Default(-1);
5415   if (AttrChan == -1)
5416     return MatchOperand_ParseFail;
5417 
5418   Str = Str.drop_back(2).drop_front(4);
5419 
5420   uint8_t Attr;
5421   if (Str.getAsInteger(10, Attr))
5422     return MatchOperand_ParseFail;
5423 
5424   SMLoc S = Parser.getTok().getLoc();
5425   Parser.Lex();
5426   if (Attr > 63) {
5427     Error(S, "out of bounds attr");
5428     return MatchOperand_Success;
5429   }
5430 
5431   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5432 
5433   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5434                                               AMDGPUOperand::ImmTyInterpAttr));
5435   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5436                                               AMDGPUOperand::ImmTyAttrChan));
5437   return MatchOperand_Success;
5438 }
5439 
5440 //===----------------------------------------------------------------------===//
5441 // exp
5442 //===----------------------------------------------------------------------===//
5443 
5444 void AMDGPUAsmParser::errorExpTgt() {
5445   Error(Parser.getTok().getLoc(), "invalid exp target");
5446 }
5447 
5448 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5449                                                       uint8_t &Val) {
5450   if (Str == "null") {
5451     Val = 9;
5452     return MatchOperand_Success;
5453   }
5454 
5455   if (Str.startswith("mrt")) {
5456     Str = Str.drop_front(3);
5457     if (Str == "z") { // == mrtz
5458       Val = 8;
5459       return MatchOperand_Success;
5460     }
5461 
5462     if (Str.getAsInteger(10, Val))
5463       return MatchOperand_ParseFail;
5464 
5465     if (Val > 7)
5466       errorExpTgt();
5467 
5468     return MatchOperand_Success;
5469   }
5470 
5471   if (Str.startswith("pos")) {
5472     Str = Str.drop_front(3);
5473     if (Str.getAsInteger(10, Val))
5474       return MatchOperand_ParseFail;
5475 
5476     if (Val > 4 || (Val == 4 && !isGFX10()))
5477       errorExpTgt();
5478 
5479     Val += 12;
5480     return MatchOperand_Success;
5481   }
5482 
5483   if (isGFX10() && Str == "prim") {
5484     Val = 20;
5485     return MatchOperand_Success;
5486   }
5487 
5488   if (Str.startswith("param")) {
5489     Str = Str.drop_front(5);
5490     if (Str.getAsInteger(10, Val))
5491       return MatchOperand_ParseFail;
5492 
5493     if (Val >= 32)
5494       errorExpTgt();
5495 
5496     Val += 32;
5497     return MatchOperand_Success;
5498   }
5499 
5500   if (Str.startswith("invalid_target_")) {
5501     Str = Str.drop_front(15);
5502     if (Str.getAsInteger(10, Val))
5503       return MatchOperand_ParseFail;
5504 
5505     errorExpTgt();
5506     return MatchOperand_Success;
5507   }
5508 
5509   return MatchOperand_NoMatch;
5510 }
5511 
5512 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5513   uint8_t Val;
5514   StringRef Str = Parser.getTok().getString();
5515 
5516   auto Res = parseExpTgtImpl(Str, Val);
5517   if (Res != MatchOperand_Success)
5518     return Res;
5519 
5520   SMLoc S = Parser.getTok().getLoc();
5521   Parser.Lex();
5522 
5523   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5524                                               AMDGPUOperand::ImmTyExpTgt));
5525   return MatchOperand_Success;
5526 }
5527 
5528 //===----------------------------------------------------------------------===//
5529 // parser helpers
5530 //===----------------------------------------------------------------------===//
5531 
5532 bool
5533 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5534   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5535 }
5536 
5537 bool
5538 AMDGPUAsmParser::isId(const StringRef Id) const {
5539   return isId(getToken(), Id);
5540 }
5541 
5542 bool
5543 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5544   return getTokenKind() == Kind;
5545 }
5546 
5547 bool
5548 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5549   if (isId(Id)) {
5550     lex();
5551     return true;
5552   }
5553   return false;
5554 }
5555 
5556 bool
5557 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5558   if (isId(Id) && peekToken().is(Kind)) {
5559     lex();
5560     lex();
5561     return true;
5562   }
5563   return false;
5564 }
5565 
5566 bool
5567 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5568   if (isToken(Kind)) {
5569     lex();
5570     return true;
5571   }
5572   return false;
5573 }
5574 
5575 bool
5576 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5577                            const StringRef ErrMsg) {
5578   if (!trySkipToken(Kind)) {
5579     Error(getLoc(), ErrMsg);
5580     return false;
5581   }
5582   return true;
5583 }
5584 
5585 bool
5586 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5587   return !getParser().parseAbsoluteExpression(Imm);
5588 }
5589 
5590 bool
5591 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5592   SMLoc S = getLoc();
5593 
5594   const MCExpr *Expr;
5595   if (Parser.parseExpression(Expr))
5596     return false;
5597 
5598   int64_t IntVal;
5599   if (Expr->evaluateAsAbsolute(IntVal)) {
5600     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5601   } else {
5602     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5603   }
5604   return true;
5605 }
5606 
5607 bool
5608 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5609   if (isToken(AsmToken::String)) {
5610     Val = getToken().getStringContents();
5611     lex();
5612     return true;
5613   } else {
5614     Error(getLoc(), ErrMsg);
5615     return false;
5616   }
5617 }
5618 
5619 AsmToken
5620 AMDGPUAsmParser::getToken() const {
5621   return Parser.getTok();
5622 }
5623 
5624 AsmToken
5625 AMDGPUAsmParser::peekToken() {
5626   return getLexer().peekTok();
5627 }
5628 
5629 void
5630 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5631   auto TokCount = getLexer().peekTokens(Tokens);
5632 
5633   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5634     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5635 }
5636 
5637 AsmToken::TokenKind
5638 AMDGPUAsmParser::getTokenKind() const {
5639   return getLexer().getKind();
5640 }
5641 
5642 SMLoc
5643 AMDGPUAsmParser::getLoc() const {
5644   return getToken().getLoc();
5645 }
5646 
5647 StringRef
5648 AMDGPUAsmParser::getTokenStr() const {
5649   return getToken().getString();
5650 }
5651 
5652 void
5653 AMDGPUAsmParser::lex() {
5654   Parser.Lex();
5655 }
5656 
5657 //===----------------------------------------------------------------------===//
5658 // swizzle
5659 //===----------------------------------------------------------------------===//
5660 
5661 LLVM_READNONE
5662 static unsigned
5663 encodeBitmaskPerm(const unsigned AndMask,
5664                   const unsigned OrMask,
5665                   const unsigned XorMask) {
5666   using namespace llvm::AMDGPU::Swizzle;
5667 
5668   return BITMASK_PERM_ENC |
5669          (AndMask << BITMASK_AND_SHIFT) |
5670          (OrMask  << BITMASK_OR_SHIFT)  |
5671          (XorMask << BITMASK_XOR_SHIFT);
5672 }
5673 
5674 bool
5675 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5676                                       const unsigned MinVal,
5677                                       const unsigned MaxVal,
5678                                       const StringRef ErrMsg) {
5679   for (unsigned i = 0; i < OpNum; ++i) {
5680     if (!skipToken(AsmToken::Comma, "expected a comma")){
5681       return false;
5682     }
5683     SMLoc ExprLoc = Parser.getTok().getLoc();
5684     if (!parseExpr(Op[i])) {
5685       return false;
5686     }
5687     if (Op[i] < MinVal || Op[i] > MaxVal) {
5688       Error(ExprLoc, ErrMsg);
5689       return false;
5690     }
5691   }
5692 
5693   return true;
5694 }
5695 
5696 bool
5697 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5698   using namespace llvm::AMDGPU::Swizzle;
5699 
5700   int64_t Lane[LANE_NUM];
5701   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5702                            "expected a 2-bit lane id")) {
5703     Imm = QUAD_PERM_ENC;
5704     for (unsigned I = 0; I < LANE_NUM; ++I) {
5705       Imm |= Lane[I] << (LANE_SHIFT * I);
5706     }
5707     return true;
5708   }
5709   return false;
5710 }
5711 
5712 bool
5713 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5714   using namespace llvm::AMDGPU::Swizzle;
5715 
5716   SMLoc S = Parser.getTok().getLoc();
5717   int64_t GroupSize;
5718   int64_t LaneIdx;
5719 
5720   if (!parseSwizzleOperands(1, &GroupSize,
5721                             2, 32,
5722                             "group size must be in the interval [2,32]")) {
5723     return false;
5724   }
5725   if (!isPowerOf2_64(GroupSize)) {
5726     Error(S, "group size must be a power of two");
5727     return false;
5728   }
5729   if (parseSwizzleOperands(1, &LaneIdx,
5730                            0, GroupSize - 1,
5731                            "lane id must be in the interval [0,group size - 1]")) {
5732     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5733     return true;
5734   }
5735   return false;
5736 }
5737 
5738 bool
5739 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5740   using namespace llvm::AMDGPU::Swizzle;
5741 
5742   SMLoc S = Parser.getTok().getLoc();
5743   int64_t GroupSize;
5744 
5745   if (!parseSwizzleOperands(1, &GroupSize,
5746       2, 32, "group size must be in the interval [2,32]")) {
5747     return false;
5748   }
5749   if (!isPowerOf2_64(GroupSize)) {
5750     Error(S, "group size must be a power of two");
5751     return false;
5752   }
5753 
5754   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5755   return true;
5756 }
5757 
5758 bool
5759 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5760   using namespace llvm::AMDGPU::Swizzle;
5761 
5762   SMLoc S = Parser.getTok().getLoc();
5763   int64_t GroupSize;
5764 
5765   if (!parseSwizzleOperands(1, &GroupSize,
5766       1, 16, "group size must be in the interval [1,16]")) {
5767     return false;
5768   }
5769   if (!isPowerOf2_64(GroupSize)) {
5770     Error(S, "group size must be a power of two");
5771     return false;
5772   }
5773 
5774   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5775   return true;
5776 }
5777 
5778 bool
5779 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5780   using namespace llvm::AMDGPU::Swizzle;
5781 
5782   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5783     return false;
5784   }
5785 
5786   StringRef Ctl;
5787   SMLoc StrLoc = Parser.getTok().getLoc();
5788   if (!parseString(Ctl)) {
5789     return false;
5790   }
5791   if (Ctl.size() != BITMASK_WIDTH) {
5792     Error(StrLoc, "expected a 5-character mask");
5793     return false;
5794   }
5795 
5796   unsigned AndMask = 0;
5797   unsigned OrMask = 0;
5798   unsigned XorMask = 0;
5799 
5800   for (size_t i = 0; i < Ctl.size(); ++i) {
5801     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5802     switch(Ctl[i]) {
5803     default:
5804       Error(StrLoc, "invalid mask");
5805       return false;
5806     case '0':
5807       break;
5808     case '1':
5809       OrMask |= Mask;
5810       break;
5811     case 'p':
5812       AndMask |= Mask;
5813       break;
5814     case 'i':
5815       AndMask |= Mask;
5816       XorMask |= Mask;
5817       break;
5818     }
5819   }
5820 
5821   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5822   return true;
5823 }
5824 
5825 bool
5826 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5827 
5828   SMLoc OffsetLoc = Parser.getTok().getLoc();
5829 
5830   if (!parseExpr(Imm)) {
5831     return false;
5832   }
5833   if (!isUInt<16>(Imm)) {
5834     Error(OffsetLoc, "expected a 16-bit offset");
5835     return false;
5836   }
5837   return true;
5838 }
5839 
5840 bool
5841 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5842   using namespace llvm::AMDGPU::Swizzle;
5843 
5844   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5845 
5846     SMLoc ModeLoc = Parser.getTok().getLoc();
5847     bool Ok = false;
5848 
5849     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5850       Ok = parseSwizzleQuadPerm(Imm);
5851     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5852       Ok = parseSwizzleBitmaskPerm(Imm);
5853     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5854       Ok = parseSwizzleBroadcast(Imm);
5855     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5856       Ok = parseSwizzleSwap(Imm);
5857     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5858       Ok = parseSwizzleReverse(Imm);
5859     } else {
5860       Error(ModeLoc, "expected a swizzle mode");
5861     }
5862 
5863     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5864   }
5865 
5866   return false;
5867 }
5868 
5869 OperandMatchResultTy
5870 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5871   SMLoc S = Parser.getTok().getLoc();
5872   int64_t Imm = 0;
5873 
5874   if (trySkipId("offset")) {
5875 
5876     bool Ok = false;
5877     if (skipToken(AsmToken::Colon, "expected a colon")) {
5878       if (trySkipId("swizzle")) {
5879         Ok = parseSwizzleMacro(Imm);
5880       } else {
5881         Ok = parseSwizzleOffset(Imm);
5882       }
5883     }
5884 
5885     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5886 
5887     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5888   } else {
5889     // Swizzle "offset" operand is optional.
5890     // If it is omitted, try parsing other optional operands.
5891     return parseOptionalOpr(Operands);
5892   }
5893 }
5894 
5895 bool
5896 AMDGPUOperand::isSwizzle() const {
5897   return isImmTy(ImmTySwizzle);
5898 }
5899 
5900 //===----------------------------------------------------------------------===//
5901 // VGPR Index Mode
5902 //===----------------------------------------------------------------------===//
5903 
5904 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5905 
5906   using namespace llvm::AMDGPU::VGPRIndexMode;
5907 
5908   if (trySkipToken(AsmToken::RParen)) {
5909     return OFF;
5910   }
5911 
5912   int64_t Imm = 0;
5913 
5914   while (true) {
5915     unsigned Mode = 0;
5916     SMLoc S = Parser.getTok().getLoc();
5917 
5918     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5919       if (trySkipId(IdSymbolic[ModeId])) {
5920         Mode = 1 << ModeId;
5921         break;
5922       }
5923     }
5924 
5925     if (Mode == 0) {
5926       Error(S, (Imm == 0)?
5927                "expected a VGPR index mode or a closing parenthesis" :
5928                "expected a VGPR index mode");
5929       break;
5930     }
5931 
5932     if (Imm & Mode) {
5933       Error(S, "duplicate VGPR index mode");
5934       break;
5935     }
5936     Imm |= Mode;
5937 
5938     if (trySkipToken(AsmToken::RParen))
5939       break;
5940     if (!skipToken(AsmToken::Comma,
5941                    "expected a comma or a closing parenthesis"))
5942       break;
5943   }
5944 
5945   return Imm;
5946 }
5947 
5948 OperandMatchResultTy
5949 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5950 
5951   int64_t Imm = 0;
5952   SMLoc S = Parser.getTok().getLoc();
5953 
5954   if (getLexer().getKind() == AsmToken::Identifier &&
5955       Parser.getTok().getString() == "gpr_idx" &&
5956       getLexer().peekTok().is(AsmToken::LParen)) {
5957 
5958     Parser.Lex();
5959     Parser.Lex();
5960 
5961     // If parse failed, trigger an error but do not return error code
5962     // to avoid excessive error messages.
5963     Imm = parseGPRIdxMacro();
5964 
5965   } else {
5966     if (getParser().parseAbsoluteExpression(Imm))
5967       return MatchOperand_NoMatch;
5968     if (Imm < 0 || !isUInt<4>(Imm)) {
5969       Error(S, "invalid immediate: only 4-bit values are legal");
5970     }
5971   }
5972 
5973   Operands.push_back(
5974       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5975   return MatchOperand_Success;
5976 }
5977 
5978 bool AMDGPUOperand::isGPRIdxMode() const {
5979   return isImmTy(ImmTyGprIdxMode);
5980 }
5981 
5982 //===----------------------------------------------------------------------===//
5983 // sopp branch targets
5984 //===----------------------------------------------------------------------===//
5985 
5986 OperandMatchResultTy
5987 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5988 
5989   // Make sure we are not parsing something
5990   // that looks like a label or an expression but is not.
5991   // This will improve error messages.
5992   if (isRegister() || isModifier())
5993     return MatchOperand_NoMatch;
5994 
5995   if (parseExpr(Operands)) {
5996 
5997     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5998     assert(Opr.isImm() || Opr.isExpr());
5999     SMLoc Loc = Opr.getStartLoc();
6000 
6001     // Currently we do not support arbitrary expressions as branch targets.
6002     // Only labels and absolute expressions are accepted.
6003     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6004       Error(Loc, "expected an absolute expression or a label");
6005     } else if (Opr.isImm() && !Opr.isS16Imm()) {
6006       Error(Loc, "expected a 16-bit signed jump offset");
6007     }
6008   }
6009 
6010   return MatchOperand_Success; // avoid excessive error messages
6011 }
6012 
6013 //===----------------------------------------------------------------------===//
6014 // Boolean holding registers
6015 //===----------------------------------------------------------------------===//
6016 
6017 OperandMatchResultTy
6018 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6019   return parseReg(Operands);
6020 }
6021 
6022 //===----------------------------------------------------------------------===//
6023 // mubuf
6024 //===----------------------------------------------------------------------===//
6025 
6026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6027   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6028 }
6029 
6030 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6031   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6032 }
6033 
6034 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6035   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6036 }
6037 
6038 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6039                                const OperandVector &Operands,
6040                                bool IsAtomic,
6041                                bool IsAtomicReturn,
6042                                bool IsLds) {
6043   bool IsLdsOpcode = IsLds;
6044   bool HasLdsModifier = false;
6045   OptionalImmIndexMap OptionalIdx;
6046   assert(IsAtomicReturn ? IsAtomic : true);
6047   unsigned FirstOperandIdx = 1;
6048 
6049   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6050     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6051 
6052     // Add the register arguments
6053     if (Op.isReg()) {
6054       Op.addRegOperands(Inst, 1);
6055       // Insert a tied src for atomic return dst.
6056       // This cannot be postponed as subsequent calls to
6057       // addImmOperands rely on correct number of MC operands.
6058       if (IsAtomicReturn && i == FirstOperandIdx)
6059         Op.addRegOperands(Inst, 1);
6060       continue;
6061     }
6062 
6063     // Handle the case where soffset is an immediate
6064     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6065       Op.addImmOperands(Inst, 1);
6066       continue;
6067     }
6068 
6069     HasLdsModifier |= Op.isLDS();
6070 
6071     // Handle tokens like 'offen' which are sometimes hard-coded into the
6072     // asm string.  There are no MCInst operands for these.
6073     if (Op.isToken()) {
6074       continue;
6075     }
6076     assert(Op.isImm());
6077 
6078     // Handle optional arguments
6079     OptionalIdx[Op.getImmTy()] = i;
6080   }
6081 
6082   // This is a workaround for an llvm quirk which may result in an
6083   // incorrect instruction selection. Lds and non-lds versions of
6084   // MUBUF instructions are identical except that lds versions
6085   // have mandatory 'lds' modifier. However this modifier follows
6086   // optional modifiers and llvm asm matcher regards this 'lds'
6087   // modifier as an optional one. As a result, an lds version
6088   // of opcode may be selected even if it has no 'lds' modifier.
6089   if (IsLdsOpcode && !HasLdsModifier) {
6090     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6091     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6092       Inst.setOpcode(NoLdsOpcode);
6093       IsLdsOpcode = false;
6094     }
6095   }
6096 
6097   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6098   if (!IsAtomic) { // glc is hard-coded.
6099     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6100   }
6101   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6102 
6103   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6104     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6105   }
6106 
6107   if (isGFX10())
6108     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6109 }
6110 
6111 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6112   OptionalImmIndexMap OptionalIdx;
6113 
6114   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6115     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6116 
6117     // Add the register arguments
6118     if (Op.isReg()) {
6119       Op.addRegOperands(Inst, 1);
6120       continue;
6121     }
6122 
6123     // Handle the case where soffset is an immediate
6124     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6125       Op.addImmOperands(Inst, 1);
6126       continue;
6127     }
6128 
6129     // Handle tokens like 'offen' which are sometimes hard-coded into the
6130     // asm string.  There are no MCInst operands for these.
6131     if (Op.isToken()) {
6132       continue;
6133     }
6134     assert(Op.isImm());
6135 
6136     // Handle optional arguments
6137     OptionalIdx[Op.getImmTy()] = i;
6138   }
6139 
6140   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6141                         AMDGPUOperand::ImmTyOffset);
6142   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6143   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6144   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6145   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6146 
6147   if (isGFX10())
6148     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6149 }
6150 
6151 //===----------------------------------------------------------------------===//
6152 // mimg
6153 //===----------------------------------------------------------------------===//
6154 
6155 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6156                               bool IsAtomic) {
6157   unsigned I = 1;
6158   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6159   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6160     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6161   }
6162 
6163   if (IsAtomic) {
6164     // Add src, same as dst
6165     assert(Desc.getNumDefs() == 1);
6166     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6167   }
6168 
6169   OptionalImmIndexMap OptionalIdx;
6170 
6171   for (unsigned E = Operands.size(); I != E; ++I) {
6172     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6173 
6174     // Add the register arguments
6175     if (Op.isReg()) {
6176       Op.addRegOperands(Inst, 1);
6177     } else if (Op.isImmModifier()) {
6178       OptionalIdx[Op.getImmTy()] = I;
6179     } else if (!Op.isToken()) {
6180       llvm_unreachable("unexpected operand type");
6181     }
6182   }
6183 
6184   bool IsGFX10 = isGFX10();
6185 
6186   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6187   if (IsGFX10)
6188     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6189   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6190   if (IsGFX10)
6191     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6192   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6193   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6194   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6195   if (IsGFX10)
6196     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6199   if (!IsGFX10)
6200     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6201   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6202 }
6203 
6204 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6205   cvtMIMG(Inst, Operands, true);
6206 }
6207 
6208 //===----------------------------------------------------------------------===//
6209 // smrd
6210 //===----------------------------------------------------------------------===//
6211 
6212 bool AMDGPUOperand::isSMRDOffset8() const {
6213   return isImm() && isUInt<8>(getImm());
6214 }
6215 
6216 bool AMDGPUOperand::isSMEMOffset() const {
6217   return isImm(); // Offset range is checked later by validator.
6218 }
6219 
6220 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6221   // 32-bit literals are only supported on CI and we only want to use them
6222   // when the offset is > 8-bits.
6223   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6224 }
6225 
6226 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6227   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6228 }
6229 
6230 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6231   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6232 }
6233 
6234 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6235   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6236 }
6237 
6238 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6239   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6240 }
6241 
6242 //===----------------------------------------------------------------------===//
6243 // vop3
6244 //===----------------------------------------------------------------------===//
6245 
6246 static bool ConvertOmodMul(int64_t &Mul) {
6247   if (Mul != 1 && Mul != 2 && Mul != 4)
6248     return false;
6249 
6250   Mul >>= 1;
6251   return true;
6252 }
6253 
6254 static bool ConvertOmodDiv(int64_t &Div) {
6255   if (Div == 1) {
6256     Div = 0;
6257     return true;
6258   }
6259 
6260   if (Div == 2) {
6261     Div = 3;
6262     return true;
6263   }
6264 
6265   return false;
6266 }
6267 
6268 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6269   if (BoundCtrl == 0) {
6270     BoundCtrl = 1;
6271     return true;
6272   }
6273 
6274   if (BoundCtrl == -1) {
6275     BoundCtrl = 0;
6276     return true;
6277   }
6278 
6279   return false;
6280 }
6281 
6282 // Note: the order in this table matches the order of operands in AsmString.
6283 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6284   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6285   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6286   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6287   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6288   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6289   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6290   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6291   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6292   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6293   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6294   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6295   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6296   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6297   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6298   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6299   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6300   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6301   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6302   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6303   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6304   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6305   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6306   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6307   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6308   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6309   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6310   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6311   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6312   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6313   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6314   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6315   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6316   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6317   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6318   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6319   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6320   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6321   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6322   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6323   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6324   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6325   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6326   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6327 };
6328 
6329 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6330 
6331   OperandMatchResultTy res = parseOptionalOpr(Operands);
6332 
6333   // This is a hack to enable hardcoded mandatory operands which follow
6334   // optional operands.
6335   //
6336   // Current design assumes that all operands after the first optional operand
6337   // are also optional. However implementation of some instructions violates
6338   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6339   //
6340   // To alleviate this problem, we have to (implicitly) parse extra operands
6341   // to make sure autogenerated parser of custom operands never hit hardcoded
6342   // mandatory operands.
6343 
6344   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6345     if (res != MatchOperand_Success ||
6346         isToken(AsmToken::EndOfStatement))
6347       break;
6348 
6349     trySkipToken(AsmToken::Comma);
6350     res = parseOptionalOpr(Operands);
6351   }
6352 
6353   return res;
6354 }
6355 
6356 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6357   OperandMatchResultTy res;
6358   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6359     // try to parse any optional operand here
6360     if (Op.IsBit) {
6361       res = parseNamedBit(Op.Name, Operands, Op.Type);
6362     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6363       res = parseOModOperand(Operands);
6364     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6365                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6366                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6367       res = parseSDWASel(Operands, Op.Name, Op.Type);
6368     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6369       res = parseSDWADstUnused(Operands);
6370     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6371                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6372                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6373                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6374       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6375                                         Op.ConvertResult);
6376     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6377       res = parseDim(Operands);
6378     } else {
6379       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6380     }
6381     if (res != MatchOperand_NoMatch) {
6382       return res;
6383     }
6384   }
6385   return MatchOperand_NoMatch;
6386 }
6387 
6388 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6389   StringRef Name = Parser.getTok().getString();
6390   if (Name == "mul") {
6391     return parseIntWithPrefix("mul", Operands,
6392                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6393   }
6394 
6395   if (Name == "div") {
6396     return parseIntWithPrefix("div", Operands,
6397                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6398   }
6399 
6400   return MatchOperand_NoMatch;
6401 }
6402 
6403 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6404   cvtVOP3P(Inst, Operands);
6405 
6406   int Opc = Inst.getOpcode();
6407 
6408   int SrcNum;
6409   const int Ops[] = { AMDGPU::OpName::src0,
6410                       AMDGPU::OpName::src1,
6411                       AMDGPU::OpName::src2 };
6412   for (SrcNum = 0;
6413        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6414        ++SrcNum);
6415   assert(SrcNum > 0);
6416 
6417   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6418   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6419 
6420   if ((OpSel & (1 << SrcNum)) != 0) {
6421     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6422     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6423     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6424   }
6425 }
6426 
6427 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6428       // 1. This operand is input modifiers
6429   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6430       // 2. This is not last operand
6431       && Desc.NumOperands > (OpNum + 1)
6432       // 3. Next operand is register class
6433       && Desc.OpInfo[OpNum + 1].RegClass != -1
6434       // 4. Next register is not tied to any other operand
6435       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6436 }
6437 
6438 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6439 {
6440   OptionalImmIndexMap OptionalIdx;
6441   unsigned Opc = Inst.getOpcode();
6442 
6443   unsigned I = 1;
6444   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6445   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6446     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6447   }
6448 
6449   for (unsigned E = Operands.size(); I != E; ++I) {
6450     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6451     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6452       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6453     } else if (Op.isInterpSlot() ||
6454                Op.isInterpAttr() ||
6455                Op.isAttrChan()) {
6456       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6457     } else if (Op.isImmModifier()) {
6458       OptionalIdx[Op.getImmTy()] = I;
6459     } else {
6460       llvm_unreachable("unhandled operand type");
6461     }
6462   }
6463 
6464   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6465     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6466   }
6467 
6468   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6469     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6470   }
6471 
6472   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6473     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6474   }
6475 }
6476 
6477 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6478                               OptionalImmIndexMap &OptionalIdx) {
6479   unsigned Opc = Inst.getOpcode();
6480 
6481   unsigned I = 1;
6482   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6483   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6484     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6485   }
6486 
6487   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6488     // This instruction has src modifiers
6489     for (unsigned E = Operands.size(); I != E; ++I) {
6490       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6491       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6492         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6493       } else if (Op.isImmModifier()) {
6494         OptionalIdx[Op.getImmTy()] = I;
6495       } else if (Op.isRegOrImm()) {
6496         Op.addRegOrImmOperands(Inst, 1);
6497       } else {
6498         llvm_unreachable("unhandled operand type");
6499       }
6500     }
6501   } else {
6502     // No src modifiers
6503     for (unsigned E = Operands.size(); I != E; ++I) {
6504       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6505       if (Op.isMod()) {
6506         OptionalIdx[Op.getImmTy()] = I;
6507       } else {
6508         Op.addRegOrImmOperands(Inst, 1);
6509       }
6510     }
6511   }
6512 
6513   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6514     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6515   }
6516 
6517   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6518     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6519   }
6520 
6521   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6522   // it has src2 register operand that is tied to dst operand
6523   // we don't allow modifiers for this operand in assembler so src2_modifiers
6524   // should be 0.
6525   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6526       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6527       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6528       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6529       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6530       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6531       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6532     auto it = Inst.begin();
6533     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6534     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6535     ++it;
6536     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6537   }
6538 }
6539 
6540 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6541   OptionalImmIndexMap OptionalIdx;
6542   cvtVOP3(Inst, Operands, OptionalIdx);
6543 }
6544 
6545 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6546                                const OperandVector &Operands) {
6547   OptionalImmIndexMap OptIdx;
6548   const int Opc = Inst.getOpcode();
6549   const MCInstrDesc &Desc = MII.get(Opc);
6550 
6551   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6552 
6553   cvtVOP3(Inst, Operands, OptIdx);
6554 
6555   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6556     assert(!IsPacked);
6557     Inst.addOperand(Inst.getOperand(0));
6558   }
6559 
6560   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6561   // instruction, and then figure out where to actually put the modifiers
6562 
6563   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6564 
6565   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6566   if (OpSelHiIdx != -1) {
6567     int DefaultVal = IsPacked ? -1 : 0;
6568     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6569                           DefaultVal);
6570   }
6571 
6572   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6573   if (NegLoIdx != -1) {
6574     assert(IsPacked);
6575     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6576     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6577   }
6578 
6579   const int Ops[] = { AMDGPU::OpName::src0,
6580                       AMDGPU::OpName::src1,
6581                       AMDGPU::OpName::src2 };
6582   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6583                          AMDGPU::OpName::src1_modifiers,
6584                          AMDGPU::OpName::src2_modifiers };
6585 
6586   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6587 
6588   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6589   unsigned OpSelHi = 0;
6590   unsigned NegLo = 0;
6591   unsigned NegHi = 0;
6592 
6593   if (OpSelHiIdx != -1) {
6594     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6595   }
6596 
6597   if (NegLoIdx != -1) {
6598     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6599     NegLo = Inst.getOperand(NegLoIdx).getImm();
6600     NegHi = Inst.getOperand(NegHiIdx).getImm();
6601   }
6602 
6603   for (int J = 0; J < 3; ++J) {
6604     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6605     if (OpIdx == -1)
6606       break;
6607 
6608     uint32_t ModVal = 0;
6609 
6610     if ((OpSel & (1 << J)) != 0)
6611       ModVal |= SISrcMods::OP_SEL_0;
6612 
6613     if ((OpSelHi & (1 << J)) != 0)
6614       ModVal |= SISrcMods::OP_SEL_1;
6615 
6616     if ((NegLo & (1 << J)) != 0)
6617       ModVal |= SISrcMods::NEG;
6618 
6619     if ((NegHi & (1 << J)) != 0)
6620       ModVal |= SISrcMods::NEG_HI;
6621 
6622     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6623 
6624     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6625   }
6626 }
6627 
6628 //===----------------------------------------------------------------------===//
6629 // dpp
6630 //===----------------------------------------------------------------------===//
6631 
6632 bool AMDGPUOperand::isDPP8() const {
6633   return isImmTy(ImmTyDPP8);
6634 }
6635 
6636 bool AMDGPUOperand::isDPPCtrl() const {
6637   using namespace AMDGPU::DPP;
6638 
6639   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6640   if (result) {
6641     int64_t Imm = getImm();
6642     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6643            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6644            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6645            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6646            (Imm == DppCtrl::WAVE_SHL1) ||
6647            (Imm == DppCtrl::WAVE_ROL1) ||
6648            (Imm == DppCtrl::WAVE_SHR1) ||
6649            (Imm == DppCtrl::WAVE_ROR1) ||
6650            (Imm == DppCtrl::ROW_MIRROR) ||
6651            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6652            (Imm == DppCtrl::BCAST15) ||
6653            (Imm == DppCtrl::BCAST31) ||
6654            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6655            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6656   }
6657   return false;
6658 }
6659 
6660 //===----------------------------------------------------------------------===//
6661 // mAI
6662 //===----------------------------------------------------------------------===//
6663 
6664 bool AMDGPUOperand::isBLGP() const {
6665   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6666 }
6667 
6668 bool AMDGPUOperand::isCBSZ() const {
6669   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6670 }
6671 
6672 bool AMDGPUOperand::isABID() const {
6673   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6674 }
6675 
6676 bool AMDGPUOperand::isS16Imm() const {
6677   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6678 }
6679 
6680 bool AMDGPUOperand::isU16Imm() const {
6681   return isImm() && isUInt<16>(getImm());
6682 }
6683 
6684 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6685   if (!isGFX10())
6686     return MatchOperand_NoMatch;
6687 
6688   SMLoc S = Parser.getTok().getLoc();
6689 
6690   if (getLexer().isNot(AsmToken::Identifier))
6691     return MatchOperand_NoMatch;
6692   if (getLexer().getTok().getString() != "dim")
6693     return MatchOperand_NoMatch;
6694 
6695   Parser.Lex();
6696   if (getLexer().isNot(AsmToken::Colon))
6697     return MatchOperand_ParseFail;
6698 
6699   Parser.Lex();
6700 
6701   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6702   // integer.
6703   std::string Token;
6704   if (getLexer().is(AsmToken::Integer)) {
6705     SMLoc Loc = getLexer().getTok().getEndLoc();
6706     Token = std::string(getLexer().getTok().getString());
6707     Parser.Lex();
6708     if (getLexer().getTok().getLoc() != Loc)
6709       return MatchOperand_ParseFail;
6710   }
6711   if (getLexer().isNot(AsmToken::Identifier))
6712     return MatchOperand_ParseFail;
6713   Token += getLexer().getTok().getString();
6714 
6715   StringRef DimId = Token;
6716   if (DimId.startswith("SQ_RSRC_IMG_"))
6717     DimId = DimId.substr(12);
6718 
6719   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6720   if (!DimInfo)
6721     return MatchOperand_ParseFail;
6722 
6723   Parser.Lex();
6724 
6725   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6726                                               AMDGPUOperand::ImmTyDim));
6727   return MatchOperand_Success;
6728 }
6729 
6730 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6731   SMLoc S = Parser.getTok().getLoc();
6732   StringRef Prefix;
6733 
6734   if (getLexer().getKind() == AsmToken::Identifier) {
6735     Prefix = Parser.getTok().getString();
6736   } else {
6737     return MatchOperand_NoMatch;
6738   }
6739 
6740   if (Prefix != "dpp8")
6741     return parseDPPCtrl(Operands);
6742   if (!isGFX10())
6743     return MatchOperand_NoMatch;
6744 
6745   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6746 
6747   int64_t Sels[8];
6748 
6749   Parser.Lex();
6750   if (getLexer().isNot(AsmToken::Colon))
6751     return MatchOperand_ParseFail;
6752 
6753   Parser.Lex();
6754   if (getLexer().isNot(AsmToken::LBrac))
6755     return MatchOperand_ParseFail;
6756 
6757   Parser.Lex();
6758   if (getParser().parseAbsoluteExpression(Sels[0]))
6759     return MatchOperand_ParseFail;
6760   if (0 > Sels[0] || 7 < Sels[0])
6761     return MatchOperand_ParseFail;
6762 
6763   for (size_t i = 1; i < 8; ++i) {
6764     if (getLexer().isNot(AsmToken::Comma))
6765       return MatchOperand_ParseFail;
6766 
6767     Parser.Lex();
6768     if (getParser().parseAbsoluteExpression(Sels[i]))
6769       return MatchOperand_ParseFail;
6770     if (0 > Sels[i] || 7 < Sels[i])
6771       return MatchOperand_ParseFail;
6772   }
6773 
6774   if (getLexer().isNot(AsmToken::RBrac))
6775     return MatchOperand_ParseFail;
6776   Parser.Lex();
6777 
6778   unsigned DPP8 = 0;
6779   for (size_t i = 0; i < 8; ++i)
6780     DPP8 |= (Sels[i] << (i * 3));
6781 
6782   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6783   return MatchOperand_Success;
6784 }
6785 
6786 OperandMatchResultTy
6787 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6788   using namespace AMDGPU::DPP;
6789 
6790   SMLoc S = Parser.getTok().getLoc();
6791   StringRef Prefix;
6792   int64_t Int;
6793 
6794   if (getLexer().getKind() == AsmToken::Identifier) {
6795     Prefix = Parser.getTok().getString();
6796   } else {
6797     return MatchOperand_NoMatch;
6798   }
6799 
6800   if (Prefix == "row_mirror") {
6801     Int = DppCtrl::ROW_MIRROR;
6802     Parser.Lex();
6803   } else if (Prefix == "row_half_mirror") {
6804     Int = DppCtrl::ROW_HALF_MIRROR;
6805     Parser.Lex();
6806   } else {
6807     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6808     if (Prefix != "quad_perm"
6809         && Prefix != "row_shl"
6810         && Prefix != "row_shr"
6811         && Prefix != "row_ror"
6812         && Prefix != "wave_shl"
6813         && Prefix != "wave_rol"
6814         && Prefix != "wave_shr"
6815         && Prefix != "wave_ror"
6816         && Prefix != "row_bcast"
6817         && Prefix != "row_share"
6818         && Prefix != "row_xmask") {
6819       return MatchOperand_NoMatch;
6820     }
6821 
6822     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6823       return MatchOperand_NoMatch;
6824 
6825     if (!isVI() && !isGFX9() &&
6826         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6827          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6828          Prefix == "row_bcast"))
6829       return MatchOperand_NoMatch;
6830 
6831     Parser.Lex();
6832     if (getLexer().isNot(AsmToken::Colon))
6833       return MatchOperand_ParseFail;
6834 
6835     if (Prefix == "quad_perm") {
6836       // quad_perm:[%d,%d,%d,%d]
6837       Parser.Lex();
6838       if (getLexer().isNot(AsmToken::LBrac))
6839         return MatchOperand_ParseFail;
6840       Parser.Lex();
6841 
6842       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6843         return MatchOperand_ParseFail;
6844 
6845       for (int i = 0; i < 3; ++i) {
6846         if (getLexer().isNot(AsmToken::Comma))
6847           return MatchOperand_ParseFail;
6848         Parser.Lex();
6849 
6850         int64_t Temp;
6851         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6852           return MatchOperand_ParseFail;
6853         const int shift = i*2 + 2;
6854         Int += (Temp << shift);
6855       }
6856 
6857       if (getLexer().isNot(AsmToken::RBrac))
6858         return MatchOperand_ParseFail;
6859       Parser.Lex();
6860     } else {
6861       // sel:%d
6862       Parser.Lex();
6863       if (getParser().parseAbsoluteExpression(Int))
6864         return MatchOperand_ParseFail;
6865 
6866       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6867         Int |= DppCtrl::ROW_SHL0;
6868       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6869         Int |= DppCtrl::ROW_SHR0;
6870       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6871         Int |= DppCtrl::ROW_ROR0;
6872       } else if (Prefix == "wave_shl" && 1 == Int) {
6873         Int = DppCtrl::WAVE_SHL1;
6874       } else if (Prefix == "wave_rol" && 1 == Int) {
6875         Int = DppCtrl::WAVE_ROL1;
6876       } else if (Prefix == "wave_shr" && 1 == Int) {
6877         Int = DppCtrl::WAVE_SHR1;
6878       } else if (Prefix == "wave_ror" && 1 == Int) {
6879         Int = DppCtrl::WAVE_ROR1;
6880       } else if (Prefix == "row_bcast") {
6881         if (Int == 15) {
6882           Int = DppCtrl::BCAST15;
6883         } else if (Int == 31) {
6884           Int = DppCtrl::BCAST31;
6885         } else {
6886           return MatchOperand_ParseFail;
6887         }
6888       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6889         Int |= DppCtrl::ROW_SHARE_FIRST;
6890       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6891         Int |= DppCtrl::ROW_XMASK_FIRST;
6892       } else {
6893         return MatchOperand_ParseFail;
6894       }
6895     }
6896   }
6897 
6898   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6899   return MatchOperand_Success;
6900 }
6901 
6902 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6903   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6904 }
6905 
6906 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6907   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6908 }
6909 
6910 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6911   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6912 }
6913 
6914 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6915   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6916 }
6917 
6918 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6919   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6920 }
6921 
6922 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6923   OptionalImmIndexMap OptionalIdx;
6924 
6925   unsigned I = 1;
6926   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6927   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6928     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6929   }
6930 
6931   int Fi = 0;
6932   for (unsigned E = Operands.size(); I != E; ++I) {
6933     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6934                                             MCOI::TIED_TO);
6935     if (TiedTo != -1) {
6936       assert((unsigned)TiedTo < Inst.getNumOperands());
6937       // handle tied old or src2 for MAC instructions
6938       Inst.addOperand(Inst.getOperand(TiedTo));
6939     }
6940     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6941     // Add the register arguments
6942     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6943       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6944       // Skip it.
6945       continue;
6946     }
6947 
6948     if (IsDPP8) {
6949       if (Op.isDPP8()) {
6950         Op.addImmOperands(Inst, 1);
6951       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6952         Op.addRegWithFPInputModsOperands(Inst, 2);
6953       } else if (Op.isFI()) {
6954         Fi = Op.getImm();
6955       } else if (Op.isReg()) {
6956         Op.addRegOperands(Inst, 1);
6957       } else {
6958         llvm_unreachable("Invalid operand type");
6959       }
6960     } else {
6961       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6962         Op.addRegWithFPInputModsOperands(Inst, 2);
6963       } else if (Op.isDPPCtrl()) {
6964         Op.addImmOperands(Inst, 1);
6965       } else if (Op.isImm()) {
6966         // Handle optional arguments
6967         OptionalIdx[Op.getImmTy()] = I;
6968       } else {
6969         llvm_unreachable("Invalid operand type");
6970       }
6971     }
6972   }
6973 
6974   if (IsDPP8) {
6975     using namespace llvm::AMDGPU::DPP;
6976     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6977   } else {
6978     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6979     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6980     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6981     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6982       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6983     }
6984   }
6985 }
6986 
6987 //===----------------------------------------------------------------------===//
6988 // sdwa
6989 //===----------------------------------------------------------------------===//
6990 
6991 OperandMatchResultTy
6992 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6993                               AMDGPUOperand::ImmTy Type) {
6994   using namespace llvm::AMDGPU::SDWA;
6995 
6996   SMLoc S = Parser.getTok().getLoc();
6997   StringRef Value;
6998   OperandMatchResultTy res;
6999 
7000   res = parseStringWithPrefix(Prefix, Value);
7001   if (res != MatchOperand_Success) {
7002     return res;
7003   }
7004 
7005   int64_t Int;
7006   Int = StringSwitch<int64_t>(Value)
7007         .Case("BYTE_0", SdwaSel::BYTE_0)
7008         .Case("BYTE_1", SdwaSel::BYTE_1)
7009         .Case("BYTE_2", SdwaSel::BYTE_2)
7010         .Case("BYTE_3", SdwaSel::BYTE_3)
7011         .Case("WORD_0", SdwaSel::WORD_0)
7012         .Case("WORD_1", SdwaSel::WORD_1)
7013         .Case("DWORD", SdwaSel::DWORD)
7014         .Default(0xffffffff);
7015   Parser.Lex(); // eat last token
7016 
7017   if (Int == 0xffffffff) {
7018     return MatchOperand_ParseFail;
7019   }
7020 
7021   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7022   return MatchOperand_Success;
7023 }
7024 
7025 OperandMatchResultTy
7026 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7027   using namespace llvm::AMDGPU::SDWA;
7028 
7029   SMLoc S = Parser.getTok().getLoc();
7030   StringRef Value;
7031   OperandMatchResultTy res;
7032 
7033   res = parseStringWithPrefix("dst_unused", Value);
7034   if (res != MatchOperand_Success) {
7035     return res;
7036   }
7037 
7038   int64_t Int;
7039   Int = StringSwitch<int64_t>(Value)
7040         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7041         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7042         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7043         .Default(0xffffffff);
7044   Parser.Lex(); // eat last token
7045 
7046   if (Int == 0xffffffff) {
7047     return MatchOperand_ParseFail;
7048   }
7049 
7050   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7051   return MatchOperand_Success;
7052 }
7053 
7054 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7055   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7056 }
7057 
7058 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7059   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7060 }
7061 
7062 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7063   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7064 }
7065 
7066 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7067   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7068 }
7069 
7070 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7071   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7072 }
7073 
7074 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7075                               uint64_t BasicInstType,
7076                               bool SkipDstVcc,
7077                               bool SkipSrcVcc) {
7078   using namespace llvm::AMDGPU::SDWA;
7079 
7080   OptionalImmIndexMap OptionalIdx;
7081   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7082   bool SkippedVcc = false;
7083 
7084   unsigned I = 1;
7085   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7086   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7087     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7088   }
7089 
7090   for (unsigned E = Operands.size(); I != E; ++I) {
7091     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7092     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7093         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7094       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7095       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7096       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7097       // Skip VCC only if we didn't skip it on previous iteration.
7098       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7099       if (BasicInstType == SIInstrFlags::VOP2 &&
7100           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7101            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7102         SkippedVcc = true;
7103         continue;
7104       } else if (BasicInstType == SIInstrFlags::VOPC &&
7105                  Inst.getNumOperands() == 0) {
7106         SkippedVcc = true;
7107         continue;
7108       }
7109     }
7110     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7111       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7112     } else if (Op.isImm()) {
7113       // Handle optional arguments
7114       OptionalIdx[Op.getImmTy()] = I;
7115     } else {
7116       llvm_unreachable("Invalid operand type");
7117     }
7118     SkippedVcc = false;
7119   }
7120 
7121   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7122       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7123       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7124     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7125     switch (BasicInstType) {
7126     case SIInstrFlags::VOP1:
7127       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7128       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7129         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7130       }
7131       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7132       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7133       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7134       break;
7135 
7136     case SIInstrFlags::VOP2:
7137       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7138       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7139         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7140       }
7141       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7142       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7143       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7144       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7145       break;
7146 
7147     case SIInstrFlags::VOPC:
7148       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7149         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7150       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7151       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7152       break;
7153 
7154     default:
7155       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7156     }
7157   }
7158 
7159   // special case v_mac_{f16, f32}:
7160   // it has src2 register operand that is tied to dst operand
7161   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7162       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7163     auto it = Inst.begin();
7164     std::advance(
7165       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7166     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7167   }
7168 }
7169 
7170 //===----------------------------------------------------------------------===//
7171 // mAI
7172 //===----------------------------------------------------------------------===//
7173 
7174 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7175   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7176 }
7177 
7178 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7179   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7180 }
7181 
7182 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7183   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7184 }
7185 
7186 /// Force static initialization.
7187 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7188   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7189   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7190 }
7191 
7192 #define GET_REGISTER_MATCHER
7193 #define GET_MATCHER_IMPLEMENTATION
7194 #define GET_MNEMONIC_SPELL_CHECKER
7195 #include "AMDGPUGenAsmMatcher.inc"
7196 
7197 // This fuction should be defined after auto-generated include so that we have
7198 // MatchClassKind enum defined
7199 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7200                                                      unsigned Kind) {
7201   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7202   // But MatchInstructionImpl() expects to meet token and fails to validate
7203   // operand. This method checks if we are given immediate operand but expect to
7204   // get corresponding token.
7205   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7206   switch (Kind) {
7207   case MCK_addr64:
7208     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7209   case MCK_gds:
7210     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7211   case MCK_lds:
7212     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7213   case MCK_glc:
7214     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7215   case MCK_idxen:
7216     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7217   case MCK_offen:
7218     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7219   case MCK_SSrcB32:
7220     // When operands have expression values, they will return true for isToken,
7221     // because it is not possible to distinguish between a token and an
7222     // expression at parse time. MatchInstructionImpl() will always try to
7223     // match an operand as a token, when isToken returns true, and when the
7224     // name of the expression is not a valid token, the match will fail,
7225     // so we need to handle it here.
7226     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7227   case MCK_SSrcF32:
7228     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7229   case MCK_SoppBrTarget:
7230     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7231   case MCK_VReg32OrOff:
7232     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7233   case MCK_InterpSlot:
7234     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7235   case MCK_Attr:
7236     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7237   case MCK_AttrChan:
7238     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7239   case MCK_ImmSMEMOffset:
7240     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7241   case MCK_SReg_64:
7242   case MCK_SReg_64_XEXEC:
7243     // Null is defined as a 32-bit register but
7244     // it should also be enabled with 64-bit operands.
7245     // The following code enables it for SReg_64 operands
7246     // used as source and destination. Remaining source
7247     // operands are handled in isInlinableImm.
7248     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7249   default:
7250     return Match_InvalidOperand;
7251   }
7252 }
7253 
7254 //===----------------------------------------------------------------------===//
7255 // endpgm
7256 //===----------------------------------------------------------------------===//
7257 
7258 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7259   SMLoc S = Parser.getTok().getLoc();
7260   int64_t Imm = 0;
7261 
7262   if (!parseExpr(Imm)) {
7263     // The operand is optional, if not present default to 0
7264     Imm = 0;
7265   }
7266 
7267   if (!isUInt<16>(Imm)) {
7268     Error(S, "expected a 16-bit value");
7269     return MatchOperand_ParseFail;
7270   }
7271 
7272   Operands.push_back(
7273       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7274   return MatchOperand_Success;
7275 }
7276 
7277 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7278