1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth);
1092 
1093   bool isRegister();
1094   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1095   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1096   void initializeGprCountSymbol(RegisterKind RegKind);
1097   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1098                              unsigned RegWidth);
1099   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1100                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1101   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1102                  bool IsGdsHardcoded);
1103 
1104 public:
1105   enum AMDGPUMatchResultTy {
1106     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1107   };
1108   enum OperandMode {
1109     OperandMode_Default,
1110     OperandMode_NSA,
1111   };
1112 
1113   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1114 
1115   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1116                const MCInstrInfo &MII,
1117                const MCTargetOptions &Options)
1118       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1119     MCAsmParserExtension::Initialize(Parser);
1120 
1121     if (getFeatureBits().none()) {
1122       // Set default features.
1123       copySTI().ToggleFeature("southern-islands");
1124     }
1125 
1126     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1127 
1128     {
1129       // TODO: make those pre-defined variables read-only.
1130       // Currently there is none suitable machinery in the core llvm-mc for this.
1131       // MCSymbol::isRedefinable is intended for another purpose, and
1132       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1133       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1134       MCContext &Ctx = getContext();
1135       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1136         MCSymbol *Sym =
1137             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1138         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1139         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1140         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1141         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1142         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1143       } else {
1144         MCSymbol *Sym =
1145             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1147         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1148         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1149         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1151       }
1152       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1153         initializeGprCountSymbol(IS_VGPR);
1154         initializeGprCountSymbol(IS_SGPR);
1155       } else
1156         KernelScope.initialize(getContext());
1157     }
1158   }
1159 
1160   bool hasXNACK() const {
1161     return AMDGPU::hasXNACK(getSTI());
1162   }
1163 
1164   bool hasMIMG_R128() const {
1165     return AMDGPU::hasMIMG_R128(getSTI());
1166   }
1167 
1168   bool hasPackedD16() const {
1169     return AMDGPU::hasPackedD16(getSTI());
1170   }
1171 
1172   bool hasGFX10A16() const {
1173     return AMDGPU::hasGFX10A16(getSTI());
1174   }
1175 
1176   bool isSI() const {
1177     return AMDGPU::isSI(getSTI());
1178   }
1179 
1180   bool isCI() const {
1181     return AMDGPU::isCI(getSTI());
1182   }
1183 
1184   bool isVI() const {
1185     return AMDGPU::isVI(getSTI());
1186   }
1187 
1188   bool isGFX9() const {
1189     return AMDGPU::isGFX9(getSTI());
1190   }
1191 
1192   bool isGFX10() const {
1193     return AMDGPU::isGFX10(getSTI());
1194   }
1195 
1196   bool isGFX10_BEncoding() const {
1197     return AMDGPU::isGFX10_BEncoding(getSTI());
1198   }
1199 
1200   bool hasInv2PiInlineImm() const {
1201     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1202   }
1203 
1204   bool hasFlatOffsets() const {
1205     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1206   }
1207 
1208   bool hasSGPR102_SGPR103() const {
1209     return !isVI() && !isGFX9();
1210   }
1211 
1212   bool hasSGPR104_SGPR105() const {
1213     return isGFX10();
1214   }
1215 
1216   bool hasIntClamp() const {
1217     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1218   }
1219 
1220   AMDGPUTargetStreamer &getTargetStreamer() {
1221     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1222     return static_cast<AMDGPUTargetStreamer &>(TS);
1223   }
1224 
1225   const MCRegisterInfo *getMRI() const {
1226     // We need this const_cast because for some reason getContext() is not const
1227     // in MCAsmParser.
1228     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1229   }
1230 
1231   const MCInstrInfo *getMII() const {
1232     return &MII;
1233   }
1234 
1235   const FeatureBitset &getFeatureBits() const {
1236     return getSTI().getFeatureBits();
1237   }
1238 
1239   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1240   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1241   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1242 
1243   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1244   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1245   bool isForcedDPP() const { return ForcedDPP; }
1246   bool isForcedSDWA() const { return ForcedSDWA; }
1247   ArrayRef<unsigned> getMatchedVariants() const;
1248 
1249   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1250   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1251                      bool RestoreOnFailure);
1252   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1253   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1254                                         SMLoc &EndLoc) override;
1255   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1256   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1257                                       unsigned Kind) override;
1258   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1259                                OperandVector &Operands, MCStreamer &Out,
1260                                uint64_t &ErrorInfo,
1261                                bool MatchingInlineAsm) override;
1262   bool ParseDirective(AsmToken DirectiveID) override;
1263   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1264                                     OperandMode Mode = OperandMode_Default);
1265   StringRef parseMnemonicSuffix(StringRef Name);
1266   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1267                         SMLoc NameLoc, OperandVector &Operands) override;
1268   //bool ProcessInstruction(MCInst &Inst);
1269 
1270   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1271 
1272   OperandMatchResultTy
1273   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1274                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1275                      bool (*ConvertResult)(int64_t &) = nullptr);
1276 
1277   OperandMatchResultTy
1278   parseOperandArrayWithPrefix(const char *Prefix,
1279                               OperandVector &Operands,
1280                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1281                               bool (*ConvertResult)(int64_t&) = nullptr);
1282 
1283   OperandMatchResultTy
1284   parseNamedBit(const char *Name, OperandVector &Operands,
1285                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1286   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1287                                              StringRef &Value);
1288 
1289   bool isModifier();
1290   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1291   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1292   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1293   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1294   bool parseSP3NegModifier();
1295   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1296   OperandMatchResultTy parseReg(OperandVector &Operands);
1297   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1298   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1299   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1300   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1301   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1302   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1303   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1304   OperandMatchResultTy parseUfmt(int64_t &Format);
1305   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1306   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1307   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1308   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1309   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1310   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1311   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1312 
1313   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1314   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1315   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1316   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1317 
1318   bool parseCnt(int64_t &IntVal);
1319   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1320   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1321 
1322 private:
1323   struct OperandInfoTy {
1324     int64_t Id;
1325     bool IsSymbolic = false;
1326     bool IsDefined = false;
1327 
1328     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1329   };
1330 
1331   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1332   bool validateSendMsg(const OperandInfoTy &Msg,
1333                        const OperandInfoTy &Op,
1334                        const OperandInfoTy &Stream,
1335                        const SMLoc Loc);
1336 
1337   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1338   bool validateHwreg(const OperandInfoTy &HwReg,
1339                      const int64_t Offset,
1340                      const int64_t Width,
1341                      const SMLoc Loc);
1342 
1343   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1344   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1345   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1346 
1347   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1348   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1349   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1350   bool validateSOPLiteral(const MCInst &Inst) const;
1351   bool validateConstantBusLimitations(const MCInst &Inst);
1352   bool validateEarlyClobberLimitations(const MCInst &Inst);
1353   bool validateIntClampSupported(const MCInst &Inst);
1354   bool validateMIMGAtomicDMask(const MCInst &Inst);
1355   bool validateMIMGGatherDMask(const MCInst &Inst);
1356   bool validateMovrels(const MCInst &Inst);
1357   bool validateMIMGDataSize(const MCInst &Inst);
1358   bool validateMIMGAddrSize(const MCInst &Inst);
1359   bool validateMIMGD16(const MCInst &Inst);
1360   bool validateMIMGDim(const MCInst &Inst);
1361   bool validateLdsDirect(const MCInst &Inst);
1362   bool validateOpSel(const MCInst &Inst);
1363   bool validateVccOperand(unsigned Reg) const;
1364   bool validateVOP3Literal(const MCInst &Inst) const;
1365   bool validateMAIAccWrite(const MCInst &Inst);
1366   unsigned getConstantBusLimit(unsigned Opcode) const;
1367   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1368   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1369   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1370 
1371   bool isId(const StringRef Id) const;
1372   bool isId(const AsmToken &Token, const StringRef Id) const;
1373   bool isToken(const AsmToken::TokenKind Kind) const;
1374   bool trySkipId(const StringRef Id);
1375   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1376   bool trySkipToken(const AsmToken::TokenKind Kind);
1377   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1378   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1379   bool parseId(StringRef &Val, const StringRef ErrMsg);
1380 
1381   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1382   AsmToken::TokenKind getTokenKind() const;
1383   bool parseExpr(int64_t &Imm);
1384   bool parseExpr(OperandVector &Operands);
1385   StringRef getTokenStr() const;
1386   AsmToken peekToken();
1387   AsmToken getToken() const;
1388   SMLoc getLoc() const;
1389   void lex();
1390 
1391 public:
1392   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1393   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1394 
1395   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1396   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1397   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1398   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1399   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1400   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1401 
1402   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1403                             const unsigned MinVal,
1404                             const unsigned MaxVal,
1405                             const StringRef ErrMsg);
1406   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1407   bool parseSwizzleOffset(int64_t &Imm);
1408   bool parseSwizzleMacro(int64_t &Imm);
1409   bool parseSwizzleQuadPerm(int64_t &Imm);
1410   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1411   bool parseSwizzleBroadcast(int64_t &Imm);
1412   bool parseSwizzleSwap(int64_t &Imm);
1413   bool parseSwizzleReverse(int64_t &Imm);
1414 
1415   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1416   int64_t parseGPRIdxMacro();
1417 
1418   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1419   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1420   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1421   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1422   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1423 
1424   AMDGPUOperand::Ptr defaultDLC() const;
1425   AMDGPUOperand::Ptr defaultGLC() const;
1426   AMDGPUOperand::Ptr defaultSLC() const;
1427 
1428   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1429   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1430   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1431   AMDGPUOperand::Ptr defaultFlatOffset() const;
1432 
1433   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1434 
1435   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1436                OptionalImmIndexMap &OptionalIdx);
1437   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1438   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1439   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1440 
1441   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1442 
1443   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1444                bool IsAtomic = false);
1445   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1446 
1447   OperandMatchResultTy parseDim(OperandVector &Operands);
1448   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1449   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1450   AMDGPUOperand::Ptr defaultRowMask() const;
1451   AMDGPUOperand::Ptr defaultBankMask() const;
1452   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1453   AMDGPUOperand::Ptr defaultFI() const;
1454   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1455   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1456 
1457   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1458                                     AMDGPUOperand::ImmTy Type);
1459   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1460   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1461   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1462   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1463   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1464   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1465   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1466                uint64_t BasicInstType,
1467                bool SkipDstVcc = false,
1468                bool SkipSrcVcc = false);
1469 
1470   AMDGPUOperand::Ptr defaultBLGP() const;
1471   AMDGPUOperand::Ptr defaultCBSZ() const;
1472   AMDGPUOperand::Ptr defaultABID() const;
1473 
1474   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1475   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1476 };
1477 
1478 struct OptionalOperand {
1479   const char *Name;
1480   AMDGPUOperand::ImmTy Type;
1481   bool IsBit;
1482   bool (*ConvertResult)(int64_t&);
1483 };
1484 
1485 } // end anonymous namespace
1486 
1487 // May be called with integer type with equivalent bitwidth.
1488 static const fltSemantics *getFltSemantics(unsigned Size) {
1489   switch (Size) {
1490   case 4:
1491     return &APFloat::IEEEsingle();
1492   case 8:
1493     return &APFloat::IEEEdouble();
1494   case 2:
1495     return &APFloat::IEEEhalf();
1496   default:
1497     llvm_unreachable("unsupported fp type");
1498   }
1499 }
1500 
1501 static const fltSemantics *getFltSemantics(MVT VT) {
1502   return getFltSemantics(VT.getSizeInBits() / 8);
1503 }
1504 
1505 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1506   switch (OperandType) {
1507   case AMDGPU::OPERAND_REG_IMM_INT32:
1508   case AMDGPU::OPERAND_REG_IMM_FP32:
1509   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1510   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1511   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1512   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1513     return &APFloat::IEEEsingle();
1514   case AMDGPU::OPERAND_REG_IMM_INT64:
1515   case AMDGPU::OPERAND_REG_IMM_FP64:
1516   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1517   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1518     return &APFloat::IEEEdouble();
1519   case AMDGPU::OPERAND_REG_IMM_INT16:
1520   case AMDGPU::OPERAND_REG_IMM_FP16:
1521   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1522   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1523   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1524   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1525   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1527   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1528   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1529   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1530   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1531     return &APFloat::IEEEhalf();
1532   default:
1533     llvm_unreachable("unsupported fp type");
1534   }
1535 }
1536 
1537 //===----------------------------------------------------------------------===//
1538 // Operand
1539 //===----------------------------------------------------------------------===//
1540 
1541 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1542   bool Lost;
1543 
1544   // Convert literal to single precision
1545   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1546                                                APFloat::rmNearestTiesToEven,
1547                                                &Lost);
1548   // We allow precision lost but not overflow or underflow
1549   if (Status != APFloat::opOK &&
1550       Lost &&
1551       ((Status & APFloat::opOverflow)  != 0 ||
1552        (Status & APFloat::opUnderflow) != 0)) {
1553     return false;
1554   }
1555 
1556   return true;
1557 }
1558 
1559 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1560   return isUIntN(Size, Val) || isIntN(Size, Val);
1561 }
1562 
1563 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1564   if (VT.getScalarType() == MVT::i16) {
1565     // FP immediate values are broken.
1566     return isInlinableIntLiteral(Val);
1567   }
1568 
1569   // f16/v2f16 operands work correctly for all values.
1570   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1571 }
1572 
1573 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1574 
1575   // This is a hack to enable named inline values like
1576   // shared_base with both 32-bit and 64-bit operands.
1577   // Note that these values are defined as
1578   // 32-bit operands only.
1579   if (isInlineValue()) {
1580     return true;
1581   }
1582 
1583   if (!isImmTy(ImmTyNone)) {
1584     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1585     return false;
1586   }
1587   // TODO: We should avoid using host float here. It would be better to
1588   // check the float bit values which is what a few other places do.
1589   // We've had bot failures before due to weird NaN support on mips hosts.
1590 
1591   APInt Literal(64, Imm.Val);
1592 
1593   if (Imm.IsFPImm) { // We got fp literal token
1594     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1595       return AMDGPU::isInlinableLiteral64(Imm.Val,
1596                                           AsmParser->hasInv2PiInlineImm());
1597     }
1598 
1599     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1600     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1601       return false;
1602 
1603     if (type.getScalarSizeInBits() == 16) {
1604       return isInlineableLiteralOp16(
1605         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1606         type, AsmParser->hasInv2PiInlineImm());
1607     }
1608 
1609     // Check if single precision literal is inlinable
1610     return AMDGPU::isInlinableLiteral32(
1611       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1612       AsmParser->hasInv2PiInlineImm());
1613   }
1614 
1615   // We got int literal token.
1616   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1617     return AMDGPU::isInlinableLiteral64(Imm.Val,
1618                                         AsmParser->hasInv2PiInlineImm());
1619   }
1620 
1621   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1622     return false;
1623   }
1624 
1625   if (type.getScalarSizeInBits() == 16) {
1626     return isInlineableLiteralOp16(
1627       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1628       type, AsmParser->hasInv2PiInlineImm());
1629   }
1630 
1631   return AMDGPU::isInlinableLiteral32(
1632     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1633     AsmParser->hasInv2PiInlineImm());
1634 }
1635 
1636 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1637   // Check that this immediate can be added as literal
1638   if (!isImmTy(ImmTyNone)) {
1639     return false;
1640   }
1641 
1642   if (!Imm.IsFPImm) {
1643     // We got int literal token.
1644 
1645     if (type == MVT::f64 && hasFPModifiers()) {
1646       // Cannot apply fp modifiers to int literals preserving the same semantics
1647       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1648       // disable these cases.
1649       return false;
1650     }
1651 
1652     unsigned Size = type.getSizeInBits();
1653     if (Size == 64)
1654       Size = 32;
1655 
1656     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1657     // types.
1658     return isSafeTruncation(Imm.Val, Size);
1659   }
1660 
1661   // We got fp literal token
1662   if (type == MVT::f64) { // Expected 64-bit fp operand
1663     // We would set low 64-bits of literal to zeroes but we accept this literals
1664     return true;
1665   }
1666 
1667   if (type == MVT::i64) { // Expected 64-bit int operand
1668     // We don't allow fp literals in 64-bit integer instructions. It is
1669     // unclear how we should encode them.
1670     return false;
1671   }
1672 
1673   // We allow fp literals with f16x2 operands assuming that the specified
1674   // literal goes into the lower half and the upper half is zero. We also
1675   // require that the literal may be losslesly converted to f16.
1676   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1677                      (type == MVT::v2i16)? MVT::i16 : type;
1678 
1679   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1680   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1681 }
1682 
1683 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1684   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1685 }
1686 
1687 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1688   if (AsmParser->isVI())
1689     return isVReg32();
1690   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1691     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1692   else
1693     return false;
1694 }
1695 
1696 bool AMDGPUOperand::isSDWAFP16Operand() const {
1697   return isSDWAOperand(MVT::f16);
1698 }
1699 
1700 bool AMDGPUOperand::isSDWAFP32Operand() const {
1701   return isSDWAOperand(MVT::f32);
1702 }
1703 
1704 bool AMDGPUOperand::isSDWAInt16Operand() const {
1705   return isSDWAOperand(MVT::i16);
1706 }
1707 
1708 bool AMDGPUOperand::isSDWAInt32Operand() const {
1709   return isSDWAOperand(MVT::i32);
1710 }
1711 
1712 bool AMDGPUOperand::isBoolReg() const {
1713   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1714          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1715 }
1716 
1717 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1718 {
1719   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1720   assert(Size == 2 || Size == 4 || Size == 8);
1721 
1722   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1723 
1724   if (Imm.Mods.Abs) {
1725     Val &= ~FpSignMask;
1726   }
1727   if (Imm.Mods.Neg) {
1728     Val ^= FpSignMask;
1729   }
1730 
1731   return Val;
1732 }
1733 
1734 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1735   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1736                              Inst.getNumOperands())) {
1737     addLiteralImmOperand(Inst, Imm.Val,
1738                          ApplyModifiers &
1739                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1740   } else {
1741     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1742     Inst.addOperand(MCOperand::createImm(Imm.Val));
1743   }
1744 }
1745 
1746 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1747   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1748   auto OpNum = Inst.getNumOperands();
1749   // Check that this operand accepts literals
1750   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1751 
1752   if (ApplyModifiers) {
1753     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1754     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1755     Val = applyInputFPModifiers(Val, Size);
1756   }
1757 
1758   APInt Literal(64, Val);
1759   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1760 
1761   if (Imm.IsFPImm) { // We got fp literal token
1762     switch (OpTy) {
1763     case AMDGPU::OPERAND_REG_IMM_INT64:
1764     case AMDGPU::OPERAND_REG_IMM_FP64:
1765     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1766     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1767       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1768                                        AsmParser->hasInv2PiInlineImm())) {
1769         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1770         return;
1771       }
1772 
1773       // Non-inlineable
1774       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1775         // For fp operands we check if low 32 bits are zeros
1776         if (Literal.getLoBits(32) != 0) {
1777           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1778           "Can't encode literal as exact 64-bit floating-point operand. "
1779           "Low 32-bits will be set to zero");
1780         }
1781 
1782         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1783         return;
1784       }
1785 
1786       // We don't allow fp literals in 64-bit integer instructions. It is
1787       // unclear how we should encode them. This case should be checked earlier
1788       // in predicate methods (isLiteralImm())
1789       llvm_unreachable("fp literal in 64-bit integer instruction.");
1790 
1791     case AMDGPU::OPERAND_REG_IMM_INT32:
1792     case AMDGPU::OPERAND_REG_IMM_FP32:
1793     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1794     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1795     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1796     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1797     case AMDGPU::OPERAND_REG_IMM_INT16:
1798     case AMDGPU::OPERAND_REG_IMM_FP16:
1799     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1800     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1801     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1802     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1803     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1804     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1805     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1806     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1807     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1808     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1809       bool lost;
1810       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1811       // Convert literal to single precision
1812       FPLiteral.convert(*getOpFltSemantics(OpTy),
1813                         APFloat::rmNearestTiesToEven, &lost);
1814       // We allow precision lost but not overflow or underflow. This should be
1815       // checked earlier in isLiteralImm()
1816 
1817       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1818       Inst.addOperand(MCOperand::createImm(ImmVal));
1819       return;
1820     }
1821     default:
1822       llvm_unreachable("invalid operand size");
1823     }
1824 
1825     return;
1826   }
1827 
1828   // We got int literal token.
1829   // Only sign extend inline immediates.
1830   switch (OpTy) {
1831   case AMDGPU::OPERAND_REG_IMM_INT32:
1832   case AMDGPU::OPERAND_REG_IMM_FP32:
1833   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1834   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1835   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1836   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1837   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1838   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1839     if (isSafeTruncation(Val, 32) &&
1840         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1841                                      AsmParser->hasInv2PiInlineImm())) {
1842       Inst.addOperand(MCOperand::createImm(Val));
1843       return;
1844     }
1845 
1846     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1847     return;
1848 
1849   case AMDGPU::OPERAND_REG_IMM_INT64:
1850   case AMDGPU::OPERAND_REG_IMM_FP64:
1851   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1852   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1853     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1854       Inst.addOperand(MCOperand::createImm(Val));
1855       return;
1856     }
1857 
1858     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1859     return;
1860 
1861   case AMDGPU::OPERAND_REG_IMM_INT16:
1862   case AMDGPU::OPERAND_REG_IMM_FP16:
1863   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1864   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1865   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1867     if (isSafeTruncation(Val, 16) &&
1868         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1869                                      AsmParser->hasInv2PiInlineImm())) {
1870       Inst.addOperand(MCOperand::createImm(Val));
1871       return;
1872     }
1873 
1874     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1875     return;
1876 
1877   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1878   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1879   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1881     assert(isSafeTruncation(Val, 16));
1882     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1883                                         AsmParser->hasInv2PiInlineImm()));
1884 
1885     Inst.addOperand(MCOperand::createImm(Val));
1886     return;
1887   }
1888   default:
1889     llvm_unreachable("invalid operand size");
1890   }
1891 }
1892 
1893 template <unsigned Bitwidth>
1894 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1895   APInt Literal(64, Imm.Val);
1896 
1897   if (!Imm.IsFPImm) {
1898     // We got int literal token.
1899     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1900     return;
1901   }
1902 
1903   bool Lost;
1904   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1905   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1906                     APFloat::rmNearestTiesToEven, &Lost);
1907   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1908 }
1909 
1910 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1911   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1912 }
1913 
1914 static bool isInlineValue(unsigned Reg) {
1915   switch (Reg) {
1916   case AMDGPU::SRC_SHARED_BASE:
1917   case AMDGPU::SRC_SHARED_LIMIT:
1918   case AMDGPU::SRC_PRIVATE_BASE:
1919   case AMDGPU::SRC_PRIVATE_LIMIT:
1920   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1921     return true;
1922   case AMDGPU::SRC_VCCZ:
1923   case AMDGPU::SRC_EXECZ:
1924   case AMDGPU::SRC_SCC:
1925     return true;
1926   case AMDGPU::SGPR_NULL:
1927     return true;
1928   default:
1929     return false;
1930   }
1931 }
1932 
1933 bool AMDGPUOperand::isInlineValue() const {
1934   return isRegKind() && ::isInlineValue(getReg());
1935 }
1936 
1937 //===----------------------------------------------------------------------===//
1938 // AsmParser
1939 //===----------------------------------------------------------------------===//
1940 
1941 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1942   if (Is == IS_VGPR) {
1943     switch (RegWidth) {
1944       default: return -1;
1945       case 1: return AMDGPU::VGPR_32RegClassID;
1946       case 2: return AMDGPU::VReg_64RegClassID;
1947       case 3: return AMDGPU::VReg_96RegClassID;
1948       case 4: return AMDGPU::VReg_128RegClassID;
1949       case 5: return AMDGPU::VReg_160RegClassID;
1950       case 6: return AMDGPU::VReg_192RegClassID;
1951       case 8: return AMDGPU::VReg_256RegClassID;
1952       case 16: return AMDGPU::VReg_512RegClassID;
1953       case 32: return AMDGPU::VReg_1024RegClassID;
1954     }
1955   } else if (Is == IS_TTMP) {
1956     switch (RegWidth) {
1957       default: return -1;
1958       case 1: return AMDGPU::TTMP_32RegClassID;
1959       case 2: return AMDGPU::TTMP_64RegClassID;
1960       case 4: return AMDGPU::TTMP_128RegClassID;
1961       case 8: return AMDGPU::TTMP_256RegClassID;
1962       case 16: return AMDGPU::TTMP_512RegClassID;
1963     }
1964   } else if (Is == IS_SGPR) {
1965     switch (RegWidth) {
1966       default: return -1;
1967       case 1: return AMDGPU::SGPR_32RegClassID;
1968       case 2: return AMDGPU::SGPR_64RegClassID;
1969       case 3: return AMDGPU::SGPR_96RegClassID;
1970       case 4: return AMDGPU::SGPR_128RegClassID;
1971       case 5: return AMDGPU::SGPR_160RegClassID;
1972       case 6: return AMDGPU::SGPR_192RegClassID;
1973       case 8: return AMDGPU::SGPR_256RegClassID;
1974       case 16: return AMDGPU::SGPR_512RegClassID;
1975     }
1976   } else if (Is == IS_AGPR) {
1977     switch (RegWidth) {
1978       default: return -1;
1979       case 1: return AMDGPU::AGPR_32RegClassID;
1980       case 2: return AMDGPU::AReg_64RegClassID;
1981       case 3: return AMDGPU::AReg_96RegClassID;
1982       case 4: return AMDGPU::AReg_128RegClassID;
1983       case 5: return AMDGPU::AReg_160RegClassID;
1984       case 6: return AMDGPU::AReg_192RegClassID;
1985       case 8: return AMDGPU::AReg_256RegClassID;
1986       case 16: return AMDGPU::AReg_512RegClassID;
1987       case 32: return AMDGPU::AReg_1024RegClassID;
1988     }
1989   }
1990   return -1;
1991 }
1992 
1993 static unsigned getSpecialRegForName(StringRef RegName) {
1994   return StringSwitch<unsigned>(RegName)
1995     .Case("exec", AMDGPU::EXEC)
1996     .Case("vcc", AMDGPU::VCC)
1997     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1998     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1999     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2000     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2001     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2002     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2003     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2004     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2005     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2006     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2007     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2008     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2009     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2010     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2011     .Case("m0", AMDGPU::M0)
2012     .Case("vccz", AMDGPU::SRC_VCCZ)
2013     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2014     .Case("execz", AMDGPU::SRC_EXECZ)
2015     .Case("src_execz", AMDGPU::SRC_EXECZ)
2016     .Case("scc", AMDGPU::SRC_SCC)
2017     .Case("src_scc", AMDGPU::SRC_SCC)
2018     .Case("tba", AMDGPU::TBA)
2019     .Case("tma", AMDGPU::TMA)
2020     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2021     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2022     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2023     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2024     .Case("vcc_lo", AMDGPU::VCC_LO)
2025     .Case("vcc_hi", AMDGPU::VCC_HI)
2026     .Case("exec_lo", AMDGPU::EXEC_LO)
2027     .Case("exec_hi", AMDGPU::EXEC_HI)
2028     .Case("tma_lo", AMDGPU::TMA_LO)
2029     .Case("tma_hi", AMDGPU::TMA_HI)
2030     .Case("tba_lo", AMDGPU::TBA_LO)
2031     .Case("tba_hi", AMDGPU::TBA_HI)
2032     .Case("pc", AMDGPU::PC_REG)
2033     .Case("null", AMDGPU::SGPR_NULL)
2034     .Default(AMDGPU::NoRegister);
2035 }
2036 
2037 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2038                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2039   auto R = parseRegister();
2040   if (!R) return true;
2041   assert(R->isReg());
2042   RegNo = R->getReg();
2043   StartLoc = R->getStartLoc();
2044   EndLoc = R->getEndLoc();
2045   return false;
2046 }
2047 
2048 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2049                                     SMLoc &EndLoc) {
2050   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2051 }
2052 
2053 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2054                                                        SMLoc &StartLoc,
2055                                                        SMLoc &EndLoc) {
2056   bool Result =
2057       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2058   bool PendingErrors = getParser().hasPendingError();
2059   getParser().clearPendingErrors();
2060   if (PendingErrors)
2061     return MatchOperand_ParseFail;
2062   if (Result)
2063     return MatchOperand_NoMatch;
2064   return MatchOperand_Success;
2065 }
2066 
2067 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2068                                             RegisterKind RegKind, unsigned Reg1) {
2069   switch (RegKind) {
2070   case IS_SPECIAL:
2071     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2072       Reg = AMDGPU::EXEC;
2073       RegWidth = 2;
2074       return true;
2075     }
2076     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2077       Reg = AMDGPU::FLAT_SCR;
2078       RegWidth = 2;
2079       return true;
2080     }
2081     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2082       Reg = AMDGPU::XNACK_MASK;
2083       RegWidth = 2;
2084       return true;
2085     }
2086     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2087       Reg = AMDGPU::VCC;
2088       RegWidth = 2;
2089       return true;
2090     }
2091     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2092       Reg = AMDGPU::TBA;
2093       RegWidth = 2;
2094       return true;
2095     }
2096     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2097       Reg = AMDGPU::TMA;
2098       RegWidth = 2;
2099       return true;
2100     }
2101     return false;
2102   case IS_VGPR:
2103   case IS_SGPR:
2104   case IS_AGPR:
2105   case IS_TTMP:
2106     if (Reg1 != Reg + RegWidth) {
2107       return false;
2108     }
2109     RegWidth++;
2110     return true;
2111   default:
2112     llvm_unreachable("unexpected register kind");
2113   }
2114 }
2115 
2116 struct RegInfo {
2117   StringLiteral Name;
2118   RegisterKind Kind;
2119 };
2120 
2121 static constexpr RegInfo RegularRegisters[] = {
2122   {{"v"},    IS_VGPR},
2123   {{"s"},    IS_SGPR},
2124   {{"ttmp"}, IS_TTMP},
2125   {{"acc"},  IS_AGPR},
2126   {{"a"},    IS_AGPR},
2127 };
2128 
2129 static bool isRegularReg(RegisterKind Kind) {
2130   return Kind == IS_VGPR ||
2131          Kind == IS_SGPR ||
2132          Kind == IS_TTMP ||
2133          Kind == IS_AGPR;
2134 }
2135 
2136 static const RegInfo* getRegularRegInfo(StringRef Str) {
2137   for (const RegInfo &Reg : RegularRegisters)
2138     if (Str.startswith(Reg.Name))
2139       return &Reg;
2140   return nullptr;
2141 }
2142 
2143 static bool getRegNum(StringRef Str, unsigned& Num) {
2144   return !Str.getAsInteger(10, Num);
2145 }
2146 
2147 bool
2148 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2149                             const AsmToken &NextToken) const {
2150 
2151   // A list of consecutive registers: [s0,s1,s2,s3]
2152   if (Token.is(AsmToken::LBrac))
2153     return true;
2154 
2155   if (!Token.is(AsmToken::Identifier))
2156     return false;
2157 
2158   // A single register like s0 or a range of registers like s[0:1]
2159 
2160   StringRef Str = Token.getString();
2161   const RegInfo *Reg = getRegularRegInfo(Str);
2162   if (Reg) {
2163     StringRef RegName = Reg->Name;
2164     StringRef RegSuffix = Str.substr(RegName.size());
2165     if (!RegSuffix.empty()) {
2166       unsigned Num;
2167       // A single register with an index: rXX
2168       if (getRegNum(RegSuffix, Num))
2169         return true;
2170     } else {
2171       // A range of registers: r[XX:YY].
2172       if (NextToken.is(AsmToken::LBrac))
2173         return true;
2174     }
2175   }
2176 
2177   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2178 }
2179 
2180 bool
2181 AMDGPUAsmParser::isRegister()
2182 {
2183   return isRegister(getToken(), peekToken());
2184 }
2185 
2186 unsigned
2187 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2188                                unsigned RegNum,
2189                                unsigned RegWidth) {
2190 
2191   assert(isRegularReg(RegKind));
2192 
2193   unsigned AlignSize = 1;
2194   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2195     // SGPR and TTMP registers must be aligned.
2196     // Max required alignment is 4 dwords.
2197     AlignSize = std::min(RegWidth, 4u);
2198   }
2199 
2200   if (RegNum % AlignSize != 0)
2201     return AMDGPU::NoRegister;
2202 
2203   unsigned RegIdx = RegNum / AlignSize;
2204   int RCID = getRegClass(RegKind, RegWidth);
2205   if (RCID == -1)
2206     return AMDGPU::NoRegister;
2207 
2208   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2209   const MCRegisterClass RC = TRI->getRegClass(RCID);
2210   if (RegIdx >= RC.getNumRegs())
2211     return AMDGPU::NoRegister;
2212 
2213   return RC.getRegister(RegIdx);
2214 }
2215 
2216 bool
2217 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2218   int64_t RegLo, RegHi;
2219   if (!trySkipToken(AsmToken::LBrac))
2220     return false;
2221 
2222   if (!parseExpr(RegLo))
2223     return false;
2224 
2225   if (trySkipToken(AsmToken::Colon)) {
2226     if (!parseExpr(RegHi))
2227       return false;
2228   } else {
2229     RegHi = RegLo;
2230   }
2231 
2232   if (!trySkipToken(AsmToken::RBrac))
2233     return false;
2234 
2235   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2236     return false;
2237 
2238   Num = static_cast<unsigned>(RegLo);
2239   Width = (RegHi - RegLo) + 1;
2240   return true;
2241 }
2242 
2243 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2244                                           unsigned &RegNum, unsigned &RegWidth,
2245                                           SmallVectorImpl<AsmToken> &Tokens) {
2246   assert(isToken(AsmToken::Identifier));
2247   unsigned Reg = getSpecialRegForName(getTokenStr());
2248   if (Reg) {
2249     RegNum = 0;
2250     RegWidth = 1;
2251     RegKind = IS_SPECIAL;
2252     Tokens.push_back(getToken());
2253     lex(); // skip register name
2254   }
2255   return Reg;
2256 }
2257 
2258 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2259                                           unsigned &RegNum, unsigned &RegWidth,
2260                                           SmallVectorImpl<AsmToken> &Tokens) {
2261   assert(isToken(AsmToken::Identifier));
2262   StringRef RegName = getTokenStr();
2263 
2264   const RegInfo *RI = getRegularRegInfo(RegName);
2265   if (!RI)
2266     return AMDGPU::NoRegister;
2267   Tokens.push_back(getToken());
2268   lex(); // skip register name
2269 
2270   RegKind = RI->Kind;
2271   StringRef RegSuffix = RegName.substr(RI->Name.size());
2272   if (!RegSuffix.empty()) {
2273     // Single 32-bit register: vXX.
2274     if (!getRegNum(RegSuffix, RegNum))
2275       return AMDGPU::NoRegister;
2276     RegWidth = 1;
2277   } else {
2278     // Range of registers: v[XX:YY]. ":YY" is optional.
2279     if (!ParseRegRange(RegNum, RegWidth))
2280       return AMDGPU::NoRegister;
2281   }
2282 
2283   return getRegularReg(RegKind, RegNum, RegWidth);
2284 }
2285 
2286 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2287                                        unsigned &RegWidth,
2288                                        SmallVectorImpl<AsmToken> &Tokens) {
2289   unsigned Reg = AMDGPU::NoRegister;
2290 
2291   if (!trySkipToken(AsmToken::LBrac))
2292     return AMDGPU::NoRegister;
2293 
2294   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2295 
2296   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2297     return AMDGPU::NoRegister;
2298   if (RegWidth != 1)
2299     return AMDGPU::NoRegister;
2300 
2301   for (; trySkipToken(AsmToken::Comma); ) {
2302     RegisterKind NextRegKind;
2303     unsigned NextReg, NextRegNum, NextRegWidth;
2304 
2305     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2306                              Tokens))
2307       return AMDGPU::NoRegister;
2308     if (NextRegWidth != 1)
2309       return AMDGPU::NoRegister;
2310     if (NextRegKind != RegKind)
2311       return AMDGPU::NoRegister;
2312     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2313       return AMDGPU::NoRegister;
2314   }
2315 
2316   if (!trySkipToken(AsmToken::RBrac))
2317     return AMDGPU::NoRegister;
2318 
2319   if (isRegularReg(RegKind))
2320     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2321 
2322   return Reg;
2323 }
2324 
2325 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2326                                           unsigned &RegNum, unsigned &RegWidth,
2327                                           SmallVectorImpl<AsmToken> &Tokens) {
2328   Reg = AMDGPU::NoRegister;
2329 
2330   if (isToken(AsmToken::Identifier)) {
2331     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2332     if (Reg == AMDGPU::NoRegister)
2333       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2334   } else {
2335     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2336   }
2337 
2338   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2339   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2340 }
2341 
2342 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2343                                           unsigned &RegNum, unsigned &RegWidth,
2344                                           bool RestoreOnFailure) {
2345   Reg = AMDGPU::NoRegister;
2346 
2347   SmallVector<AsmToken, 1> Tokens;
2348   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2349     if (RestoreOnFailure) {
2350       while (!Tokens.empty()) {
2351         getLexer().UnLex(Tokens.pop_back_val());
2352       }
2353     }
2354     return true;
2355   }
2356   return false;
2357 }
2358 
2359 Optional<StringRef>
2360 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2361   switch (RegKind) {
2362   case IS_VGPR:
2363     return StringRef(".amdgcn.next_free_vgpr");
2364   case IS_SGPR:
2365     return StringRef(".amdgcn.next_free_sgpr");
2366   default:
2367     return None;
2368   }
2369 }
2370 
2371 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2372   auto SymbolName = getGprCountSymbolName(RegKind);
2373   assert(SymbolName && "initializing invalid register kind");
2374   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2375   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2376 }
2377 
2378 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2379                                             unsigned DwordRegIndex,
2380                                             unsigned RegWidth) {
2381   // Symbols are only defined for GCN targets
2382   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2383     return true;
2384 
2385   auto SymbolName = getGprCountSymbolName(RegKind);
2386   if (!SymbolName)
2387     return true;
2388   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2389 
2390   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2391   int64_t OldCount;
2392 
2393   if (!Sym->isVariable())
2394     return !Error(getParser().getTok().getLoc(),
2395                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2396   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2397     return !Error(
2398         getParser().getTok().getLoc(),
2399         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2400 
2401   if (OldCount <= NewMax)
2402     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2403 
2404   return true;
2405 }
2406 
2407 std::unique_ptr<AMDGPUOperand>
2408 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2409   const auto &Tok = Parser.getTok();
2410   SMLoc StartLoc = Tok.getLoc();
2411   SMLoc EndLoc = Tok.getEndLoc();
2412   RegisterKind RegKind;
2413   unsigned Reg, RegNum, RegWidth;
2414 
2415   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2416     //FIXME: improve error messages (bug 41303).
2417     Error(StartLoc, "not a valid operand.");
2418     return nullptr;
2419   }
2420   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2421     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2422       return nullptr;
2423   } else
2424     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2425   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2426 }
2427 
2428 OperandMatchResultTy
2429 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2430   // TODO: add syntactic sugar for 1/(2*PI)
2431 
2432   assert(!isRegister());
2433   assert(!isModifier());
2434 
2435   const auto& Tok = getToken();
2436   const auto& NextTok = peekToken();
2437   bool IsReal = Tok.is(AsmToken::Real);
2438   SMLoc S = getLoc();
2439   bool Negate = false;
2440 
2441   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2442     lex();
2443     IsReal = true;
2444     Negate = true;
2445   }
2446 
2447   if (IsReal) {
2448     // Floating-point expressions are not supported.
2449     // Can only allow floating-point literals with an
2450     // optional sign.
2451 
2452     StringRef Num = getTokenStr();
2453     lex();
2454 
2455     APFloat RealVal(APFloat::IEEEdouble());
2456     auto roundMode = APFloat::rmNearestTiesToEven;
2457     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2458       return MatchOperand_ParseFail;
2459     }
2460     if (Negate)
2461       RealVal.changeSign();
2462 
2463     Operands.push_back(
2464       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2465                                AMDGPUOperand::ImmTyNone, true));
2466 
2467     return MatchOperand_Success;
2468 
2469   } else {
2470     int64_t IntVal;
2471     const MCExpr *Expr;
2472     SMLoc S = getLoc();
2473 
2474     if (HasSP3AbsModifier) {
2475       // This is a workaround for handling expressions
2476       // as arguments of SP3 'abs' modifier, for example:
2477       //     |1.0|
2478       //     |-1|
2479       //     |1+x|
2480       // This syntax is not compatible with syntax of standard
2481       // MC expressions (due to the trailing '|').
2482       SMLoc EndLoc;
2483       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2484         return MatchOperand_ParseFail;
2485     } else {
2486       if (Parser.parseExpression(Expr))
2487         return MatchOperand_ParseFail;
2488     }
2489 
2490     if (Expr->evaluateAsAbsolute(IntVal)) {
2491       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2492     } else {
2493       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2494     }
2495 
2496     return MatchOperand_Success;
2497   }
2498 
2499   return MatchOperand_NoMatch;
2500 }
2501 
2502 OperandMatchResultTy
2503 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2504   if (!isRegister())
2505     return MatchOperand_NoMatch;
2506 
2507   if (auto R = parseRegister()) {
2508     assert(R->isReg());
2509     Operands.push_back(std::move(R));
2510     return MatchOperand_Success;
2511   }
2512   return MatchOperand_ParseFail;
2513 }
2514 
2515 OperandMatchResultTy
2516 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2517   auto res = parseReg(Operands);
2518   if (res != MatchOperand_NoMatch) {
2519     return res;
2520   } else if (isModifier()) {
2521     return MatchOperand_NoMatch;
2522   } else {
2523     return parseImm(Operands, HasSP3AbsMod);
2524   }
2525 }
2526 
2527 bool
2528 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2529   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2530     const auto &str = Token.getString();
2531     return str == "abs" || str == "neg" || str == "sext";
2532   }
2533   return false;
2534 }
2535 
2536 bool
2537 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2538   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2539 }
2540 
2541 bool
2542 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2543   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2544 }
2545 
2546 bool
2547 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2548   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2549 }
2550 
2551 // Check if this is an operand modifier or an opcode modifier
2552 // which may look like an expression but it is not. We should
2553 // avoid parsing these modifiers as expressions. Currently
2554 // recognized sequences are:
2555 //   |...|
2556 //   abs(...)
2557 //   neg(...)
2558 //   sext(...)
2559 //   -reg
2560 //   -|...|
2561 //   -abs(...)
2562 //   name:...
2563 // Note that simple opcode modifiers like 'gds' may be parsed as
2564 // expressions; this is a special case. See getExpressionAsToken.
2565 //
2566 bool
2567 AMDGPUAsmParser::isModifier() {
2568 
2569   AsmToken Tok = getToken();
2570   AsmToken NextToken[2];
2571   peekTokens(NextToken);
2572 
2573   return isOperandModifier(Tok, NextToken[0]) ||
2574          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2575          isOpcodeModifierWithVal(Tok, NextToken[0]);
2576 }
2577 
2578 // Check if the current token is an SP3 'neg' modifier.
2579 // Currently this modifier is allowed in the following context:
2580 //
2581 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2582 // 2. Before an 'abs' modifier: -abs(...)
2583 // 3. Before an SP3 'abs' modifier: -|...|
2584 //
2585 // In all other cases "-" is handled as a part
2586 // of an expression that follows the sign.
2587 //
2588 // Note: When "-" is followed by an integer literal,
2589 // this is interpreted as integer negation rather
2590 // than a floating-point NEG modifier applied to N.
2591 // Beside being contr-intuitive, such use of floating-point
2592 // NEG modifier would have resulted in different meaning
2593 // of integer literals used with VOP1/2/C and VOP3,
2594 // for example:
2595 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2596 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2597 // Negative fp literals with preceding "-" are
2598 // handled likewise for unifomtity
2599 //
2600 bool
2601 AMDGPUAsmParser::parseSP3NegModifier() {
2602 
2603   AsmToken NextToken[2];
2604   peekTokens(NextToken);
2605 
2606   if (isToken(AsmToken::Minus) &&
2607       (isRegister(NextToken[0], NextToken[1]) ||
2608        NextToken[0].is(AsmToken::Pipe) ||
2609        isId(NextToken[0], "abs"))) {
2610     lex();
2611     return true;
2612   }
2613 
2614   return false;
2615 }
2616 
2617 OperandMatchResultTy
2618 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2619                                               bool AllowImm) {
2620   bool Neg, SP3Neg;
2621   bool Abs, SP3Abs;
2622   SMLoc Loc;
2623 
2624   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2625   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2626     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2627     return MatchOperand_ParseFail;
2628   }
2629 
2630   SP3Neg = parseSP3NegModifier();
2631 
2632   Loc = getLoc();
2633   Neg = trySkipId("neg");
2634   if (Neg && SP3Neg) {
2635     Error(Loc, "expected register or immediate");
2636     return MatchOperand_ParseFail;
2637   }
2638   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2639     return MatchOperand_ParseFail;
2640 
2641   Abs = trySkipId("abs");
2642   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2643     return MatchOperand_ParseFail;
2644 
2645   Loc = getLoc();
2646   SP3Abs = trySkipToken(AsmToken::Pipe);
2647   if (Abs && SP3Abs) {
2648     Error(Loc, "expected register or immediate");
2649     return MatchOperand_ParseFail;
2650   }
2651 
2652   OperandMatchResultTy Res;
2653   if (AllowImm) {
2654     Res = parseRegOrImm(Operands, SP3Abs);
2655   } else {
2656     Res = parseReg(Operands);
2657   }
2658   if (Res != MatchOperand_Success) {
2659     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2660   }
2661 
2662   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2663     return MatchOperand_ParseFail;
2664   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2665     return MatchOperand_ParseFail;
2666   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2667     return MatchOperand_ParseFail;
2668 
2669   AMDGPUOperand::Modifiers Mods;
2670   Mods.Abs = Abs || SP3Abs;
2671   Mods.Neg = Neg || SP3Neg;
2672 
2673   if (Mods.hasFPModifiers()) {
2674     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2675     if (Op.isExpr()) {
2676       Error(Op.getStartLoc(), "expected an absolute expression");
2677       return MatchOperand_ParseFail;
2678     }
2679     Op.setModifiers(Mods);
2680   }
2681   return MatchOperand_Success;
2682 }
2683 
2684 OperandMatchResultTy
2685 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2686                                                bool AllowImm) {
2687   bool Sext = trySkipId("sext");
2688   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2689     return MatchOperand_ParseFail;
2690 
2691   OperandMatchResultTy Res;
2692   if (AllowImm) {
2693     Res = parseRegOrImm(Operands);
2694   } else {
2695     Res = parseReg(Operands);
2696   }
2697   if (Res != MatchOperand_Success) {
2698     return Sext? MatchOperand_ParseFail : Res;
2699   }
2700 
2701   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2702     return MatchOperand_ParseFail;
2703 
2704   AMDGPUOperand::Modifiers Mods;
2705   Mods.Sext = Sext;
2706 
2707   if (Mods.hasIntModifiers()) {
2708     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2709     if (Op.isExpr()) {
2710       Error(Op.getStartLoc(), "expected an absolute expression");
2711       return MatchOperand_ParseFail;
2712     }
2713     Op.setModifiers(Mods);
2714   }
2715 
2716   return MatchOperand_Success;
2717 }
2718 
2719 OperandMatchResultTy
2720 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2721   return parseRegOrImmWithFPInputMods(Operands, false);
2722 }
2723 
2724 OperandMatchResultTy
2725 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2726   return parseRegOrImmWithIntInputMods(Operands, false);
2727 }
2728 
2729 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2730   auto Loc = getLoc();
2731   if (trySkipId("off")) {
2732     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2733                                                 AMDGPUOperand::ImmTyOff, false));
2734     return MatchOperand_Success;
2735   }
2736 
2737   if (!isRegister())
2738     return MatchOperand_NoMatch;
2739 
2740   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2741   if (Reg) {
2742     Operands.push_back(std::move(Reg));
2743     return MatchOperand_Success;
2744   }
2745 
2746   return MatchOperand_ParseFail;
2747 
2748 }
2749 
2750 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2751   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2752 
2753   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2754       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2755       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2756       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2757     return Match_InvalidOperand;
2758 
2759   if ((TSFlags & SIInstrFlags::VOP3) &&
2760       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2761       getForcedEncodingSize() != 64)
2762     return Match_PreferE32;
2763 
2764   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2765       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2766     // v_mac_f32/16 allow only dst_sel == DWORD;
2767     auto OpNum =
2768         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2769     const auto &Op = Inst.getOperand(OpNum);
2770     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2771       return Match_InvalidOperand;
2772     }
2773   }
2774 
2775   return Match_Success;
2776 }
2777 
2778 // What asm variants we should check
2779 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2780   if (getForcedEncodingSize() == 32) {
2781     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2782     return makeArrayRef(Variants);
2783   }
2784 
2785   if (isForcedVOP3()) {
2786     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2787     return makeArrayRef(Variants);
2788   }
2789 
2790   if (isForcedSDWA()) {
2791     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2792                                         AMDGPUAsmVariants::SDWA9};
2793     return makeArrayRef(Variants);
2794   }
2795 
2796   if (isForcedDPP()) {
2797     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2798     return makeArrayRef(Variants);
2799   }
2800 
2801   static const unsigned Variants[] = {
2802     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2803     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2804   };
2805 
2806   return makeArrayRef(Variants);
2807 }
2808 
2809 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2810   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2811   const unsigned Num = Desc.getNumImplicitUses();
2812   for (unsigned i = 0; i < Num; ++i) {
2813     unsigned Reg = Desc.ImplicitUses[i];
2814     switch (Reg) {
2815     case AMDGPU::FLAT_SCR:
2816     case AMDGPU::VCC:
2817     case AMDGPU::VCC_LO:
2818     case AMDGPU::VCC_HI:
2819     case AMDGPU::M0:
2820       return Reg;
2821     default:
2822       break;
2823     }
2824   }
2825   return AMDGPU::NoRegister;
2826 }
2827 
2828 // NB: This code is correct only when used to check constant
2829 // bus limitations because GFX7 support no f16 inline constants.
2830 // Note that there are no cases when a GFX7 opcode violates
2831 // constant bus limitations due to the use of an f16 constant.
2832 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2833                                        unsigned OpIdx) const {
2834   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2835 
2836   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2837     return false;
2838   }
2839 
2840   const MCOperand &MO = Inst.getOperand(OpIdx);
2841 
2842   int64_t Val = MO.getImm();
2843   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2844 
2845   switch (OpSize) { // expected operand size
2846   case 8:
2847     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2848   case 4:
2849     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2850   case 2: {
2851     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2852     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2853         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2854         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2855       return AMDGPU::isInlinableIntLiteral(Val);
2856 
2857     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2858         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2859         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2860       return AMDGPU::isInlinableIntLiteralV216(Val);
2861 
2862     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2863         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2864         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2865       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2866 
2867     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2868   }
2869   default:
2870     llvm_unreachable("invalid operand size");
2871   }
2872 }
2873 
2874 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2875   if (!isGFX10())
2876     return 1;
2877 
2878   switch (Opcode) {
2879   // 64-bit shift instructions can use only one scalar value input
2880   case AMDGPU::V_LSHLREV_B64:
2881   case AMDGPU::V_LSHLREV_B64_gfx10:
2882   case AMDGPU::V_LSHL_B64:
2883   case AMDGPU::V_LSHRREV_B64:
2884   case AMDGPU::V_LSHRREV_B64_gfx10:
2885   case AMDGPU::V_LSHR_B64:
2886   case AMDGPU::V_ASHRREV_I64:
2887   case AMDGPU::V_ASHRREV_I64_gfx10:
2888   case AMDGPU::V_ASHR_I64:
2889     return 1;
2890   default:
2891     return 2;
2892   }
2893 }
2894 
2895 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2896   const MCOperand &MO = Inst.getOperand(OpIdx);
2897   if (MO.isImm()) {
2898     return !isInlineConstant(Inst, OpIdx);
2899   } else if (MO.isReg()) {
2900     auto Reg = MO.getReg();
2901     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2902     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2903   } else {
2904     return true;
2905   }
2906 }
2907 
2908 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2909   const unsigned Opcode = Inst.getOpcode();
2910   const MCInstrDesc &Desc = MII.get(Opcode);
2911   unsigned ConstantBusUseCount = 0;
2912   unsigned NumLiterals = 0;
2913   unsigned LiteralSize;
2914 
2915   if (Desc.TSFlags &
2916       (SIInstrFlags::VOPC |
2917        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2918        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2919        SIInstrFlags::SDWA)) {
2920     // Check special imm operands (used by madmk, etc)
2921     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2922       ++ConstantBusUseCount;
2923     }
2924 
2925     SmallDenseSet<unsigned> SGPRsUsed;
2926     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2927     if (SGPRUsed != AMDGPU::NoRegister) {
2928       SGPRsUsed.insert(SGPRUsed);
2929       ++ConstantBusUseCount;
2930     }
2931 
2932     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2933     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2934     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2935 
2936     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2937 
2938     for (int OpIdx : OpIndices) {
2939       if (OpIdx == -1) break;
2940 
2941       const MCOperand &MO = Inst.getOperand(OpIdx);
2942       if (usesConstantBus(Inst, OpIdx)) {
2943         if (MO.isReg()) {
2944           const unsigned Reg = mc2PseudoReg(MO.getReg());
2945           // Pairs of registers with a partial intersections like these
2946           //   s0, s[0:1]
2947           //   flat_scratch_lo, flat_scratch
2948           //   flat_scratch_lo, flat_scratch_hi
2949           // are theoretically valid but they are disabled anyway.
2950           // Note that this code mimics SIInstrInfo::verifyInstruction
2951           if (!SGPRsUsed.count(Reg)) {
2952             SGPRsUsed.insert(Reg);
2953             ++ConstantBusUseCount;
2954           }
2955         } else { // Expression or a literal
2956 
2957           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2958             continue; // special operand like VINTERP attr_chan
2959 
2960           // An instruction may use only one literal.
2961           // This has been validated on the previous step.
2962           // See validateVOP3Literal.
2963           // This literal may be used as more than one operand.
2964           // If all these operands are of the same size,
2965           // this literal counts as one scalar value.
2966           // Otherwise it counts as 2 scalar values.
2967           // See "GFX10 Shader Programming", section 3.6.2.3.
2968 
2969           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2970           if (Size < 4) Size = 4;
2971 
2972           if (NumLiterals == 0) {
2973             NumLiterals = 1;
2974             LiteralSize = Size;
2975           } else if (LiteralSize != Size) {
2976             NumLiterals = 2;
2977           }
2978         }
2979       }
2980     }
2981   }
2982   ConstantBusUseCount += NumLiterals;
2983 
2984   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2985 }
2986 
2987 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2988   const unsigned Opcode = Inst.getOpcode();
2989   const MCInstrDesc &Desc = MII.get(Opcode);
2990 
2991   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2992   if (DstIdx == -1 ||
2993       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2994     return true;
2995   }
2996 
2997   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2998 
2999   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3000   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3001   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3002 
3003   assert(DstIdx != -1);
3004   const MCOperand &Dst = Inst.getOperand(DstIdx);
3005   assert(Dst.isReg());
3006   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3007 
3008   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3009 
3010   for (int SrcIdx : SrcIndices) {
3011     if (SrcIdx == -1) break;
3012     const MCOperand &Src = Inst.getOperand(SrcIdx);
3013     if (Src.isReg()) {
3014       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3015       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3016         return false;
3017       }
3018     }
3019   }
3020 
3021   return true;
3022 }
3023 
3024 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3025 
3026   const unsigned Opc = Inst.getOpcode();
3027   const MCInstrDesc &Desc = MII.get(Opc);
3028 
3029   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3030     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3031     assert(ClampIdx != -1);
3032     return Inst.getOperand(ClampIdx).getImm() == 0;
3033   }
3034 
3035   return true;
3036 }
3037 
3038 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3039 
3040   const unsigned Opc = Inst.getOpcode();
3041   const MCInstrDesc &Desc = MII.get(Opc);
3042 
3043   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3044     return true;
3045 
3046   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3047   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3048   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3049 
3050   assert(VDataIdx != -1);
3051   assert(DMaskIdx != -1);
3052   assert(TFEIdx != -1);
3053 
3054   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3055   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3056   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3057   if (DMask == 0)
3058     DMask = 1;
3059 
3060   unsigned DataSize =
3061     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3062   if (hasPackedD16()) {
3063     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3064     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3065       DataSize = (DataSize + 1) / 2;
3066   }
3067 
3068   return (VDataSize / 4) == DataSize + TFESize;
3069 }
3070 
3071 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3072   const unsigned Opc = Inst.getOpcode();
3073   const MCInstrDesc &Desc = MII.get(Opc);
3074 
3075   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3076     return true;
3077 
3078   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3079   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3080       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3081   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3082   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3083   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3084 
3085   assert(VAddr0Idx != -1);
3086   assert(SrsrcIdx != -1);
3087   assert(DimIdx != -1);
3088   assert(SrsrcIdx > VAddr0Idx);
3089 
3090   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3091   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3092   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3093   unsigned VAddrSize =
3094       IsNSA ? SrsrcIdx - VAddr0Idx
3095             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3096 
3097   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3098                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3099                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3100                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3101   if (!IsNSA) {
3102     if (AddrSize > 8)
3103       AddrSize = 16;
3104     else if (AddrSize > 4)
3105       AddrSize = 8;
3106   }
3107 
3108   return VAddrSize == AddrSize;
3109 }
3110 
3111 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3112 
3113   const unsigned Opc = Inst.getOpcode();
3114   const MCInstrDesc &Desc = MII.get(Opc);
3115 
3116   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3117     return true;
3118   if (!Desc.mayLoad() || !Desc.mayStore())
3119     return true; // Not atomic
3120 
3121   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3122   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3123 
3124   // This is an incomplete check because image_atomic_cmpswap
3125   // may only use 0x3 and 0xf while other atomic operations
3126   // may use 0x1 and 0x3. However these limitations are
3127   // verified when we check that dmask matches dst size.
3128   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3129 }
3130 
3131 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3132 
3133   const unsigned Opc = Inst.getOpcode();
3134   const MCInstrDesc &Desc = MII.get(Opc);
3135 
3136   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3137     return true;
3138 
3139   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3140   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3141 
3142   // GATHER4 instructions use dmask in a different fashion compared to
3143   // other MIMG instructions. The only useful DMASK values are
3144   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3145   // (red,red,red,red) etc.) The ISA document doesn't mention
3146   // this.
3147   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3148 }
3149 
3150 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3151 {
3152   switch (Opcode) {
3153   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3154   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3155   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3156     return true;
3157   default:
3158     return false;
3159   }
3160 }
3161 
3162 // movrels* opcodes should only allow VGPRS as src0.
3163 // This is specified in .td description for vop1/vop3,
3164 // but sdwa is handled differently. See isSDWAOperand.
3165 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3166 
3167   const unsigned Opc = Inst.getOpcode();
3168   const MCInstrDesc &Desc = MII.get(Opc);
3169 
3170   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3171     return true;
3172 
3173   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3174   assert(Src0Idx != -1);
3175 
3176   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3177   if (!Src0.isReg())
3178     return false;
3179 
3180   auto Reg = Src0.getReg();
3181   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3182   return !isSGPR(mc2PseudoReg(Reg), TRI);
3183 }
3184 
3185 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3186 
3187   const unsigned Opc = Inst.getOpcode();
3188 
3189   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3190     return true;
3191 
3192   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3193   assert(Src0Idx != -1);
3194 
3195   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3196   if (!Src0.isReg())
3197     return true;
3198 
3199   auto Reg = Src0.getReg();
3200   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3201   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3202     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3203     return false;
3204   }
3205 
3206   return true;
3207 }
3208 
3209 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3210 
3211   const unsigned Opc = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opc);
3213 
3214   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3215     return true;
3216 
3217   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3218   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3219     if (isCI() || isSI())
3220       return false;
3221   }
3222 
3223   return true;
3224 }
3225 
3226 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3227   const unsigned Opc = Inst.getOpcode();
3228   const MCInstrDesc &Desc = MII.get(Opc);
3229 
3230   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3231     return true;
3232 
3233   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3234   if (DimIdx < 0)
3235     return true;
3236 
3237   long Imm = Inst.getOperand(DimIdx).getImm();
3238   if (Imm < 0 || Imm >= 8)
3239     return false;
3240 
3241   return true;
3242 }
3243 
3244 static bool IsRevOpcode(const unsigned Opcode)
3245 {
3246   switch (Opcode) {
3247   case AMDGPU::V_SUBREV_F32_e32:
3248   case AMDGPU::V_SUBREV_F32_e64:
3249   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3250   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3251   case AMDGPU::V_SUBREV_F32_e32_vi:
3252   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3253   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3254   case AMDGPU::V_SUBREV_F32_e64_vi:
3255 
3256   case AMDGPU::V_SUBREV_CO_U32_e32:
3257   case AMDGPU::V_SUBREV_CO_U32_e64:
3258   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3259   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3260 
3261   case AMDGPU::V_SUBBREV_U32_e32:
3262   case AMDGPU::V_SUBBREV_U32_e64:
3263   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3264   case AMDGPU::V_SUBBREV_U32_e32_vi:
3265   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3266   case AMDGPU::V_SUBBREV_U32_e64_vi:
3267 
3268   case AMDGPU::V_SUBREV_U32_e32:
3269   case AMDGPU::V_SUBREV_U32_e64:
3270   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3271   case AMDGPU::V_SUBREV_U32_e32_vi:
3272   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3273   case AMDGPU::V_SUBREV_U32_e64_vi:
3274 
3275   case AMDGPU::V_SUBREV_F16_e32:
3276   case AMDGPU::V_SUBREV_F16_e64:
3277   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3278   case AMDGPU::V_SUBREV_F16_e32_vi:
3279   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3280   case AMDGPU::V_SUBREV_F16_e64_vi:
3281 
3282   case AMDGPU::V_SUBREV_U16_e32:
3283   case AMDGPU::V_SUBREV_U16_e64:
3284   case AMDGPU::V_SUBREV_U16_e32_vi:
3285   case AMDGPU::V_SUBREV_U16_e64_vi:
3286 
3287   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3288   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3289   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3290 
3291   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3292   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3293 
3294   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3295   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3296 
3297   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3298   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3299 
3300   case AMDGPU::V_LSHRREV_B32_e32:
3301   case AMDGPU::V_LSHRREV_B32_e64:
3302   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3303   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3304   case AMDGPU::V_LSHRREV_B32_e32_vi:
3305   case AMDGPU::V_LSHRREV_B32_e64_vi:
3306   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3307   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3308 
3309   case AMDGPU::V_ASHRREV_I32_e32:
3310   case AMDGPU::V_ASHRREV_I32_e64:
3311   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3312   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3313   case AMDGPU::V_ASHRREV_I32_e32_vi:
3314   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3315   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3316   case AMDGPU::V_ASHRREV_I32_e64_vi:
3317 
3318   case AMDGPU::V_LSHLREV_B32_e32:
3319   case AMDGPU::V_LSHLREV_B32_e64:
3320   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3321   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3322   case AMDGPU::V_LSHLREV_B32_e32_vi:
3323   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3324   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3325   case AMDGPU::V_LSHLREV_B32_e64_vi:
3326 
3327   case AMDGPU::V_LSHLREV_B16_e32:
3328   case AMDGPU::V_LSHLREV_B16_e64:
3329   case AMDGPU::V_LSHLREV_B16_e32_vi:
3330   case AMDGPU::V_LSHLREV_B16_e64_vi:
3331   case AMDGPU::V_LSHLREV_B16_gfx10:
3332 
3333   case AMDGPU::V_LSHRREV_B16_e32:
3334   case AMDGPU::V_LSHRREV_B16_e64:
3335   case AMDGPU::V_LSHRREV_B16_e32_vi:
3336   case AMDGPU::V_LSHRREV_B16_e64_vi:
3337   case AMDGPU::V_LSHRREV_B16_gfx10:
3338 
3339   case AMDGPU::V_ASHRREV_I16_e32:
3340   case AMDGPU::V_ASHRREV_I16_e64:
3341   case AMDGPU::V_ASHRREV_I16_e32_vi:
3342   case AMDGPU::V_ASHRREV_I16_e64_vi:
3343   case AMDGPU::V_ASHRREV_I16_gfx10:
3344 
3345   case AMDGPU::V_LSHLREV_B64:
3346   case AMDGPU::V_LSHLREV_B64_gfx10:
3347   case AMDGPU::V_LSHLREV_B64_vi:
3348 
3349   case AMDGPU::V_LSHRREV_B64:
3350   case AMDGPU::V_LSHRREV_B64_gfx10:
3351   case AMDGPU::V_LSHRREV_B64_vi:
3352 
3353   case AMDGPU::V_ASHRREV_I64:
3354   case AMDGPU::V_ASHRREV_I64_gfx10:
3355   case AMDGPU::V_ASHRREV_I64_vi:
3356 
3357   case AMDGPU::V_PK_LSHLREV_B16:
3358   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3359   case AMDGPU::V_PK_LSHLREV_B16_vi:
3360 
3361   case AMDGPU::V_PK_LSHRREV_B16:
3362   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3363   case AMDGPU::V_PK_LSHRREV_B16_vi:
3364   case AMDGPU::V_PK_ASHRREV_I16:
3365   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3366   case AMDGPU::V_PK_ASHRREV_I16_vi:
3367     return true;
3368   default:
3369     return false;
3370   }
3371 }
3372 
3373 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3374 
3375   using namespace SIInstrFlags;
3376   const unsigned Opcode = Inst.getOpcode();
3377   const MCInstrDesc &Desc = MII.get(Opcode);
3378 
3379   // lds_direct register is defined so that it can be used
3380   // with 9-bit operands only. Ignore encodings which do not accept these.
3381   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3382     return true;
3383 
3384   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3385   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3386   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3387 
3388   const int SrcIndices[] = { Src1Idx, Src2Idx };
3389 
3390   // lds_direct cannot be specified as either src1 or src2.
3391   for (int SrcIdx : SrcIndices) {
3392     if (SrcIdx == -1) break;
3393     const MCOperand &Src = Inst.getOperand(SrcIdx);
3394     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3395       return false;
3396     }
3397   }
3398 
3399   if (Src0Idx == -1)
3400     return true;
3401 
3402   const MCOperand &Src = Inst.getOperand(Src0Idx);
3403   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3404     return true;
3405 
3406   // lds_direct is specified as src0. Check additional limitations.
3407   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3408 }
3409 
3410 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3411   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3412     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3413     if (Op.isFlatOffset())
3414       return Op.getStartLoc();
3415   }
3416   return getLoc();
3417 }
3418 
3419 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3420                                          const OperandVector &Operands) {
3421   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3422   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3423     return true;
3424 
3425   auto Opcode = Inst.getOpcode();
3426   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3427   assert(OpNum != -1);
3428 
3429   const auto &Op = Inst.getOperand(OpNum);
3430   if (!hasFlatOffsets() && Op.getImm() != 0) {
3431     Error(getFlatOffsetLoc(Operands),
3432           "flat offset modifier is not supported on this GPU");
3433     return false;
3434   }
3435 
3436   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3437   // For FLAT segment the offset must be positive;
3438   // MSB is ignored and forced to zero.
3439   unsigned OffsetSize = isGFX9() ? 13 : 12;
3440   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3441     if (!isIntN(OffsetSize, Op.getImm())) {
3442       Error(getFlatOffsetLoc(Operands),
3443             isGFX9() ? "expected a 13-bit signed offset" :
3444                        "expected a 12-bit signed offset");
3445       return false;
3446     }
3447   } else {
3448     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3449       Error(getFlatOffsetLoc(Operands),
3450             isGFX9() ? "expected a 12-bit unsigned offset" :
3451                        "expected an 11-bit unsigned offset");
3452       return false;
3453     }
3454   }
3455 
3456   return true;
3457 }
3458 
3459 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3460   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3461     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3462     if (Op.isSMEMOffset())
3463       return Op.getStartLoc();
3464   }
3465   return getLoc();
3466 }
3467 
3468 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3469                                          const OperandVector &Operands) {
3470   if (isCI() || isSI())
3471     return true;
3472 
3473   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3474   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3475     return true;
3476 
3477   auto Opcode = Inst.getOpcode();
3478   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3479   if (OpNum == -1)
3480     return true;
3481 
3482   const auto &Op = Inst.getOperand(OpNum);
3483   if (!Op.isImm())
3484     return true;
3485 
3486   uint64_t Offset = Op.getImm();
3487   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3488   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3489       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3490     return true;
3491 
3492   Error(getSMEMOffsetLoc(Operands),
3493         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3494                                "expected a 21-bit signed offset");
3495 
3496   return false;
3497 }
3498 
3499 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3500   unsigned Opcode = Inst.getOpcode();
3501   const MCInstrDesc &Desc = MII.get(Opcode);
3502   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3503     return true;
3504 
3505   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3506   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3507 
3508   const int OpIndices[] = { Src0Idx, Src1Idx };
3509 
3510   unsigned NumExprs = 0;
3511   unsigned NumLiterals = 0;
3512   uint32_t LiteralValue;
3513 
3514   for (int OpIdx : OpIndices) {
3515     if (OpIdx == -1) break;
3516 
3517     const MCOperand &MO = Inst.getOperand(OpIdx);
3518     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3519     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3520       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3521         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3522         if (NumLiterals == 0 || LiteralValue != Value) {
3523           LiteralValue = Value;
3524           ++NumLiterals;
3525         }
3526       } else if (MO.isExpr()) {
3527         ++NumExprs;
3528       }
3529     }
3530   }
3531 
3532   return NumLiterals + NumExprs <= 1;
3533 }
3534 
3535 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3536   const unsigned Opc = Inst.getOpcode();
3537   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3538       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3539     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3540     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3541 
3542     if (OpSel & ~3)
3543       return false;
3544   }
3545   return true;
3546 }
3547 
3548 // Check if VCC register matches wavefront size
3549 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3550   auto FB = getFeatureBits();
3551   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3552     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3553 }
3554 
3555 // VOP3 literal is only allowed in GFX10+ and only one can be used
3556 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3557   unsigned Opcode = Inst.getOpcode();
3558   const MCInstrDesc &Desc = MII.get(Opcode);
3559   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3560     return true;
3561 
3562   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3563   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3564   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3565 
3566   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3567 
3568   unsigned NumExprs = 0;
3569   unsigned NumLiterals = 0;
3570   uint32_t LiteralValue;
3571 
3572   for (int OpIdx : OpIndices) {
3573     if (OpIdx == -1) break;
3574 
3575     const MCOperand &MO = Inst.getOperand(OpIdx);
3576     if (!MO.isImm() && !MO.isExpr())
3577       continue;
3578     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3579       continue;
3580 
3581     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3582         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3583       return false;
3584 
3585     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3586       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3587       if (NumLiterals == 0 || LiteralValue != Value) {
3588         LiteralValue = Value;
3589         ++NumLiterals;
3590       }
3591     } else if (MO.isExpr()) {
3592       ++NumExprs;
3593     }
3594   }
3595   NumLiterals += NumExprs;
3596 
3597   return !NumLiterals ||
3598          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3599 }
3600 
3601 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3602                                           const SMLoc &IDLoc,
3603                                           const OperandVector &Operands) {
3604   if (!validateLdsDirect(Inst)) {
3605     Error(IDLoc,
3606       "invalid use of lds_direct");
3607     return false;
3608   }
3609   if (!validateSOPLiteral(Inst)) {
3610     Error(IDLoc,
3611       "only one literal operand is allowed");
3612     return false;
3613   }
3614   if (!validateVOP3Literal(Inst)) {
3615     Error(IDLoc,
3616       "invalid literal operand");
3617     return false;
3618   }
3619   if (!validateConstantBusLimitations(Inst)) {
3620     Error(IDLoc,
3621       "invalid operand (violates constant bus restrictions)");
3622     return false;
3623   }
3624   if (!validateEarlyClobberLimitations(Inst)) {
3625     Error(IDLoc,
3626       "destination must be different than all sources");
3627     return false;
3628   }
3629   if (!validateIntClampSupported(Inst)) {
3630     Error(IDLoc,
3631       "integer clamping is not supported on this GPU");
3632     return false;
3633   }
3634   if (!validateOpSel(Inst)) {
3635     Error(IDLoc,
3636       "invalid op_sel operand");
3637     return false;
3638   }
3639   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3640   if (!validateMIMGD16(Inst)) {
3641     Error(IDLoc,
3642       "d16 modifier is not supported on this GPU");
3643     return false;
3644   }
3645   if (!validateMIMGDim(Inst)) {
3646     Error(IDLoc, "dim modifier is required on this GPU");
3647     return false;
3648   }
3649   if (!validateMIMGDataSize(Inst)) {
3650     Error(IDLoc,
3651       "image data size does not match dmask and tfe");
3652     return false;
3653   }
3654   if (!validateMIMGAddrSize(Inst)) {
3655     Error(IDLoc,
3656       "image address size does not match dim and a16");
3657     return false;
3658   }
3659   if (!validateMIMGAtomicDMask(Inst)) {
3660     Error(IDLoc,
3661       "invalid atomic image dmask");
3662     return false;
3663   }
3664   if (!validateMIMGGatherDMask(Inst)) {
3665     Error(IDLoc,
3666       "invalid image_gather dmask: only one bit must be set");
3667     return false;
3668   }
3669   if (!validateMovrels(Inst)) {
3670     Error(IDLoc, "source operand must be a VGPR");
3671     return false;
3672   }
3673   if (!validateFlatOffset(Inst, Operands)) {
3674     return false;
3675   }
3676   if (!validateSMEMOffset(Inst, Operands)) {
3677     return false;
3678   }
3679   if (!validateMAIAccWrite(Inst)) {
3680     return false;
3681   }
3682 
3683   return true;
3684 }
3685 
3686 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3687                                             const FeatureBitset &FBS,
3688                                             unsigned VariantID = 0);
3689 
3690 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3691                                               OperandVector &Operands,
3692                                               MCStreamer &Out,
3693                                               uint64_t &ErrorInfo,
3694                                               bool MatchingInlineAsm) {
3695   MCInst Inst;
3696   unsigned Result = Match_Success;
3697   for (auto Variant : getMatchedVariants()) {
3698     uint64_t EI;
3699     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3700                                   Variant);
3701     // We order match statuses from least to most specific. We use most specific
3702     // status as resulting
3703     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3704     if ((R == Match_Success) ||
3705         (R == Match_PreferE32) ||
3706         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3707         (R == Match_InvalidOperand && Result != Match_MissingFeature
3708                                    && Result != Match_PreferE32) ||
3709         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3710                                    && Result != Match_MissingFeature
3711                                    && Result != Match_PreferE32)) {
3712       Result = R;
3713       ErrorInfo = EI;
3714     }
3715     if (R == Match_Success)
3716       break;
3717   }
3718 
3719   switch (Result) {
3720   default: break;
3721   case Match_Success:
3722     if (!validateInstruction(Inst, IDLoc, Operands)) {
3723       return true;
3724     }
3725     Inst.setLoc(IDLoc);
3726     Out.emitInstruction(Inst, getSTI());
3727     return false;
3728 
3729   case Match_MissingFeature:
3730     return Error(IDLoc, "instruction not supported on this GPU");
3731 
3732   case Match_MnemonicFail: {
3733     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3734     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3735         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3736     return Error(IDLoc, "invalid instruction" + Suggestion,
3737                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3738   }
3739 
3740   case Match_InvalidOperand: {
3741     SMLoc ErrorLoc = IDLoc;
3742     if (ErrorInfo != ~0ULL) {
3743       if (ErrorInfo >= Operands.size()) {
3744         return Error(IDLoc, "too few operands for instruction");
3745       }
3746       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3747       if (ErrorLoc == SMLoc())
3748         ErrorLoc = IDLoc;
3749     }
3750     return Error(ErrorLoc, "invalid operand for instruction");
3751   }
3752 
3753   case Match_PreferE32:
3754     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3755                         "should be encoded as e32");
3756   }
3757   llvm_unreachable("Implement any new match types added!");
3758 }
3759 
3760 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3761   int64_t Tmp = -1;
3762   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3763     return true;
3764   }
3765   if (getParser().parseAbsoluteExpression(Tmp)) {
3766     return true;
3767   }
3768   Ret = static_cast<uint32_t>(Tmp);
3769   return false;
3770 }
3771 
3772 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3773                                                uint32_t &Minor) {
3774   if (ParseAsAbsoluteExpression(Major))
3775     return TokError("invalid major version");
3776 
3777   if (getLexer().isNot(AsmToken::Comma))
3778     return TokError("minor version number required, comma expected");
3779   Lex();
3780 
3781   if (ParseAsAbsoluteExpression(Minor))
3782     return TokError("invalid minor version");
3783 
3784   return false;
3785 }
3786 
3787 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3788   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3789     return TokError("directive only supported for amdgcn architecture");
3790 
3791   std::string Target;
3792 
3793   SMLoc TargetStart = getTok().getLoc();
3794   if (getParser().parseEscapedString(Target))
3795     return true;
3796   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3797 
3798   std::string ExpectedTarget;
3799   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3800   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3801 
3802   if (Target != ExpectedTargetOS.str())
3803     return getParser().Error(TargetRange.Start, "target must match options",
3804                              TargetRange);
3805 
3806   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3807   return false;
3808 }
3809 
3810 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3811   return getParser().Error(Range.Start, "value out of range", Range);
3812 }
3813 
3814 bool AMDGPUAsmParser::calculateGPRBlocks(
3815     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3816     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3817     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3818     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3819   // TODO(scott.linder): These calculations are duplicated from
3820   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3821   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3822 
3823   unsigned NumVGPRs = NextFreeVGPR;
3824   unsigned NumSGPRs = NextFreeSGPR;
3825 
3826   if (Version.Major >= 10)
3827     NumSGPRs = 0;
3828   else {
3829     unsigned MaxAddressableNumSGPRs =
3830         IsaInfo::getAddressableNumSGPRs(&getSTI());
3831 
3832     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3833         NumSGPRs > MaxAddressableNumSGPRs)
3834       return OutOfRangeError(SGPRRange);
3835 
3836     NumSGPRs +=
3837         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3838 
3839     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3840         NumSGPRs > MaxAddressableNumSGPRs)
3841       return OutOfRangeError(SGPRRange);
3842 
3843     if (Features.test(FeatureSGPRInitBug))
3844       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3845   }
3846 
3847   VGPRBlocks =
3848       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3849   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3850 
3851   return false;
3852 }
3853 
3854 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3855   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3856     return TokError("directive only supported for amdgcn architecture");
3857 
3858   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3859     return TokError("directive only supported for amdhsa OS");
3860 
3861   StringRef KernelName;
3862   if (getParser().parseIdentifier(KernelName))
3863     return true;
3864 
3865   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3866 
3867   StringSet<> Seen;
3868 
3869   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3870 
3871   SMRange VGPRRange;
3872   uint64_t NextFreeVGPR = 0;
3873   SMRange SGPRRange;
3874   uint64_t NextFreeSGPR = 0;
3875   unsigned UserSGPRCount = 0;
3876   bool ReserveVCC = true;
3877   bool ReserveFlatScr = true;
3878   bool ReserveXNACK = hasXNACK();
3879   Optional<bool> EnableWavefrontSize32;
3880 
3881   while (true) {
3882     while (getLexer().is(AsmToken::EndOfStatement))
3883       Lex();
3884 
3885     if (getLexer().isNot(AsmToken::Identifier))
3886       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3887 
3888     StringRef ID = getTok().getIdentifier();
3889     SMRange IDRange = getTok().getLocRange();
3890     Lex();
3891 
3892     if (ID == ".end_amdhsa_kernel")
3893       break;
3894 
3895     if (Seen.find(ID) != Seen.end())
3896       return TokError(".amdhsa_ directives cannot be repeated");
3897     Seen.insert(ID);
3898 
3899     SMLoc ValStart = getTok().getLoc();
3900     int64_t IVal;
3901     if (getParser().parseAbsoluteExpression(IVal))
3902       return true;
3903     SMLoc ValEnd = getTok().getLoc();
3904     SMRange ValRange = SMRange(ValStart, ValEnd);
3905 
3906     if (IVal < 0)
3907       return OutOfRangeError(ValRange);
3908 
3909     uint64_t Val = IVal;
3910 
3911 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3912   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3913     return OutOfRangeError(RANGE);                                             \
3914   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3915 
3916     if (ID == ".amdhsa_group_segment_fixed_size") {
3917       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3918         return OutOfRangeError(ValRange);
3919       KD.group_segment_fixed_size = Val;
3920     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3921       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3922         return OutOfRangeError(ValRange);
3923       KD.private_segment_fixed_size = Val;
3924     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3925       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3926                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3927                        Val, ValRange);
3928       if (Val)
3929         UserSGPRCount += 4;
3930     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3931       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3932                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3933                        ValRange);
3934       if (Val)
3935         UserSGPRCount += 2;
3936     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3937       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3938                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3939                        ValRange);
3940       if (Val)
3941         UserSGPRCount += 2;
3942     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3943       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3944                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3945                        Val, ValRange);
3946       if (Val)
3947         UserSGPRCount += 2;
3948     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3949       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3950                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3951                        ValRange);
3952       if (Val)
3953         UserSGPRCount += 2;
3954     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3955       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3956                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3957                        ValRange);
3958       if (Val)
3959         UserSGPRCount += 2;
3960     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3961       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3962                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3963                        Val, ValRange);
3964       if (Val)
3965         UserSGPRCount += 1;
3966     } else if (ID == ".amdhsa_wavefront_size32") {
3967       if (IVersion.Major < 10)
3968         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3969                                  IDRange);
3970       EnableWavefrontSize32 = Val;
3971       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3972                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3973                        Val, ValRange);
3974     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3975       PARSE_BITS_ENTRY(
3976           KD.compute_pgm_rsrc2,
3977           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3978           ValRange);
3979     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3980       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3981                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3982                        ValRange);
3983     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3984       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3985                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3986                        ValRange);
3987     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3988       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3989                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3990                        ValRange);
3991     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3992       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3993                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3994                        ValRange);
3995     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3996       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3997                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3998                        ValRange);
3999     } else if (ID == ".amdhsa_next_free_vgpr") {
4000       VGPRRange = ValRange;
4001       NextFreeVGPR = Val;
4002     } else if (ID == ".amdhsa_next_free_sgpr") {
4003       SGPRRange = ValRange;
4004       NextFreeSGPR = Val;
4005     } else if (ID == ".amdhsa_reserve_vcc") {
4006       if (!isUInt<1>(Val))
4007         return OutOfRangeError(ValRange);
4008       ReserveVCC = Val;
4009     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4010       if (IVersion.Major < 7)
4011         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4012                                  IDRange);
4013       if (!isUInt<1>(Val))
4014         return OutOfRangeError(ValRange);
4015       ReserveFlatScr = Val;
4016     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4017       if (IVersion.Major < 8)
4018         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4019                                  IDRange);
4020       if (!isUInt<1>(Val))
4021         return OutOfRangeError(ValRange);
4022       ReserveXNACK = Val;
4023     } else if (ID == ".amdhsa_float_round_mode_32") {
4024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4025                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4026     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4028                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4029     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4030       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4031                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4032     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4033       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4034                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4035                        ValRange);
4036     } else if (ID == ".amdhsa_dx10_clamp") {
4037       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4038                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4039     } else if (ID == ".amdhsa_ieee_mode") {
4040       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4041                        Val, ValRange);
4042     } else if (ID == ".amdhsa_fp16_overflow") {
4043       if (IVersion.Major < 9)
4044         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4045                                  IDRange);
4046       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4047                        ValRange);
4048     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4049       if (IVersion.Major < 10)
4050         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4051                                  IDRange);
4052       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4053                        ValRange);
4054     } else if (ID == ".amdhsa_memory_ordered") {
4055       if (IVersion.Major < 10)
4056         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4057                                  IDRange);
4058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4059                        ValRange);
4060     } else if (ID == ".amdhsa_forward_progress") {
4061       if (IVersion.Major < 10)
4062         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4063                                  IDRange);
4064       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4065                        ValRange);
4066     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4067       PARSE_BITS_ENTRY(
4068           KD.compute_pgm_rsrc2,
4069           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4070           ValRange);
4071     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4072       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4073                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4074                        Val, ValRange);
4075     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4076       PARSE_BITS_ENTRY(
4077           KD.compute_pgm_rsrc2,
4078           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4079           ValRange);
4080     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4082                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4083                        Val, ValRange);
4084     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4086                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4087                        Val, ValRange);
4088     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4089       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4090                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4091                        Val, ValRange);
4092     } else if (ID == ".amdhsa_exception_int_div_zero") {
4093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4094                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4095                        Val, ValRange);
4096     } else {
4097       return getParser().Error(IDRange.Start,
4098                                "unknown .amdhsa_kernel directive", IDRange);
4099     }
4100 
4101 #undef PARSE_BITS_ENTRY
4102   }
4103 
4104   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4105     return TokError(".amdhsa_next_free_vgpr directive is required");
4106 
4107   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4108     return TokError(".amdhsa_next_free_sgpr directive is required");
4109 
4110   unsigned VGPRBlocks;
4111   unsigned SGPRBlocks;
4112   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4113                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4114                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4115                          SGPRBlocks))
4116     return true;
4117 
4118   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4119           VGPRBlocks))
4120     return OutOfRangeError(VGPRRange);
4121   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4122                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4123 
4124   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4125           SGPRBlocks))
4126     return OutOfRangeError(SGPRRange);
4127   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4128                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4129                   SGPRBlocks);
4130 
4131   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4132     return TokError("too many user SGPRs enabled");
4133   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4134                   UserSGPRCount);
4135 
4136   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4137       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4138       ReserveFlatScr, ReserveXNACK);
4139   return false;
4140 }
4141 
4142 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4143   uint32_t Major;
4144   uint32_t Minor;
4145 
4146   if (ParseDirectiveMajorMinor(Major, Minor))
4147     return true;
4148 
4149   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4150   return false;
4151 }
4152 
4153 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4154   uint32_t Major;
4155   uint32_t Minor;
4156   uint32_t Stepping;
4157   StringRef VendorName;
4158   StringRef ArchName;
4159 
4160   // If this directive has no arguments, then use the ISA version for the
4161   // targeted GPU.
4162   if (getLexer().is(AsmToken::EndOfStatement)) {
4163     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4164     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4165                                                       ISA.Stepping,
4166                                                       "AMD", "AMDGPU");
4167     return false;
4168   }
4169 
4170   if (ParseDirectiveMajorMinor(Major, Minor))
4171     return true;
4172 
4173   if (getLexer().isNot(AsmToken::Comma))
4174     return TokError("stepping version number required, comma expected");
4175   Lex();
4176 
4177   if (ParseAsAbsoluteExpression(Stepping))
4178     return TokError("invalid stepping version");
4179 
4180   if (getLexer().isNot(AsmToken::Comma))
4181     return TokError("vendor name required, comma expected");
4182   Lex();
4183 
4184   if (getLexer().isNot(AsmToken::String))
4185     return TokError("invalid vendor name");
4186 
4187   VendorName = getLexer().getTok().getStringContents();
4188   Lex();
4189 
4190   if (getLexer().isNot(AsmToken::Comma))
4191     return TokError("arch name required, comma expected");
4192   Lex();
4193 
4194   if (getLexer().isNot(AsmToken::String))
4195     return TokError("invalid arch name");
4196 
4197   ArchName = getLexer().getTok().getStringContents();
4198   Lex();
4199 
4200   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4201                                                     VendorName, ArchName);
4202   return false;
4203 }
4204 
4205 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4206                                                amd_kernel_code_t &Header) {
4207   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4208   // assembly for backwards compatibility.
4209   if (ID == "max_scratch_backing_memory_byte_size") {
4210     Parser.eatToEndOfStatement();
4211     return false;
4212   }
4213 
4214   SmallString<40> ErrStr;
4215   raw_svector_ostream Err(ErrStr);
4216   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4217     return TokError(Err.str());
4218   }
4219   Lex();
4220 
4221   if (ID == "enable_wavefront_size32") {
4222     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4223       if (!isGFX10())
4224         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4225       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4226         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4227     } else {
4228       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4229         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4230     }
4231   }
4232 
4233   if (ID == "wavefront_size") {
4234     if (Header.wavefront_size == 5) {
4235       if (!isGFX10())
4236         return TokError("wavefront_size=5 is only allowed on GFX10+");
4237       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4238         return TokError("wavefront_size=5 requires +WavefrontSize32");
4239     } else if (Header.wavefront_size == 6) {
4240       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4241         return TokError("wavefront_size=6 requires +WavefrontSize64");
4242     }
4243   }
4244 
4245   if (ID == "enable_wgp_mode") {
4246     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4247       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4248   }
4249 
4250   if (ID == "enable_mem_ordered") {
4251     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4252       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4253   }
4254 
4255   if (ID == "enable_fwd_progress") {
4256     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4257       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4258   }
4259 
4260   return false;
4261 }
4262 
4263 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4264   amd_kernel_code_t Header;
4265   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4266 
4267   while (true) {
4268     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4269     // will set the current token to EndOfStatement.
4270     while(getLexer().is(AsmToken::EndOfStatement))
4271       Lex();
4272 
4273     if (getLexer().isNot(AsmToken::Identifier))
4274       return TokError("expected value identifier or .end_amd_kernel_code_t");
4275 
4276     StringRef ID = getLexer().getTok().getIdentifier();
4277     Lex();
4278 
4279     if (ID == ".end_amd_kernel_code_t")
4280       break;
4281 
4282     if (ParseAMDKernelCodeTValue(ID, Header))
4283       return true;
4284   }
4285 
4286   getTargetStreamer().EmitAMDKernelCodeT(Header);
4287 
4288   return false;
4289 }
4290 
4291 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4292   if (getLexer().isNot(AsmToken::Identifier))
4293     return TokError("expected symbol name");
4294 
4295   StringRef KernelName = Parser.getTok().getString();
4296 
4297   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4298                                            ELF::STT_AMDGPU_HSA_KERNEL);
4299   Lex();
4300   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4301     KernelScope.initialize(getContext());
4302   return false;
4303 }
4304 
4305 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4306   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4307     return Error(getParser().getTok().getLoc(),
4308                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4309                  "architectures");
4310   }
4311 
4312   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4313 
4314   std::string ISAVersionStringFromSTI;
4315   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4316   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4317 
4318   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4319     return Error(getParser().getTok().getLoc(),
4320                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4321                  "arguments specified through the command line");
4322   }
4323 
4324   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4325   Lex();
4326 
4327   return false;
4328 }
4329 
4330 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4331   const char *AssemblerDirectiveBegin;
4332   const char *AssemblerDirectiveEnd;
4333   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4334       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4335           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4336                             HSAMD::V3::AssemblerDirectiveEnd)
4337           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4338                             HSAMD::AssemblerDirectiveEnd);
4339 
4340   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4341     return Error(getParser().getTok().getLoc(),
4342                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4343                  "not available on non-amdhsa OSes")).str());
4344   }
4345 
4346   std::string HSAMetadataString;
4347   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4348                           HSAMetadataString))
4349     return true;
4350 
4351   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4352     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4353       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4354   } else {
4355     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4356       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4357   }
4358 
4359   return false;
4360 }
4361 
4362 /// Common code to parse out a block of text (typically YAML) between start and
4363 /// end directives.
4364 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4365                                           const char *AssemblerDirectiveEnd,
4366                                           std::string &CollectString) {
4367 
4368   raw_string_ostream CollectStream(CollectString);
4369 
4370   getLexer().setSkipSpace(false);
4371 
4372   bool FoundEnd = false;
4373   while (!getLexer().is(AsmToken::Eof)) {
4374     while (getLexer().is(AsmToken::Space)) {
4375       CollectStream << getLexer().getTok().getString();
4376       Lex();
4377     }
4378 
4379     if (getLexer().is(AsmToken::Identifier)) {
4380       StringRef ID = getLexer().getTok().getIdentifier();
4381       if (ID == AssemblerDirectiveEnd) {
4382         Lex();
4383         FoundEnd = true;
4384         break;
4385       }
4386     }
4387 
4388     CollectStream << Parser.parseStringToEndOfStatement()
4389                   << getContext().getAsmInfo()->getSeparatorString();
4390 
4391     Parser.eatToEndOfStatement();
4392   }
4393 
4394   getLexer().setSkipSpace(true);
4395 
4396   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4397     return TokError(Twine("expected directive ") +
4398                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4399   }
4400 
4401   CollectStream.flush();
4402   return false;
4403 }
4404 
4405 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4406 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4407   std::string String;
4408   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4409                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4410     return true;
4411 
4412   auto PALMetadata = getTargetStreamer().getPALMetadata();
4413   if (!PALMetadata->setFromString(String))
4414     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4415   return false;
4416 }
4417 
4418 /// Parse the assembler directive for old linear-format PAL metadata.
4419 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4420   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4421     return Error(getParser().getTok().getLoc(),
4422                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4423                  "not available on non-amdpal OSes")).str());
4424   }
4425 
4426   auto PALMetadata = getTargetStreamer().getPALMetadata();
4427   PALMetadata->setLegacy();
4428   for (;;) {
4429     uint32_t Key, Value;
4430     if (ParseAsAbsoluteExpression(Key)) {
4431       return TokError(Twine("invalid value in ") +
4432                       Twine(PALMD::AssemblerDirective));
4433     }
4434     if (getLexer().isNot(AsmToken::Comma)) {
4435       return TokError(Twine("expected an even number of values in ") +
4436                       Twine(PALMD::AssemblerDirective));
4437     }
4438     Lex();
4439     if (ParseAsAbsoluteExpression(Value)) {
4440       return TokError(Twine("invalid value in ") +
4441                       Twine(PALMD::AssemblerDirective));
4442     }
4443     PALMetadata->setRegister(Key, Value);
4444     if (getLexer().isNot(AsmToken::Comma))
4445       break;
4446     Lex();
4447   }
4448   return false;
4449 }
4450 
4451 /// ParseDirectiveAMDGPULDS
4452 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4453 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4454   if (getParser().checkForValidSection())
4455     return true;
4456 
4457   StringRef Name;
4458   SMLoc NameLoc = getLexer().getLoc();
4459   if (getParser().parseIdentifier(Name))
4460     return TokError("expected identifier in directive");
4461 
4462   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4463   if (parseToken(AsmToken::Comma, "expected ','"))
4464     return true;
4465 
4466   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4467 
4468   int64_t Size;
4469   SMLoc SizeLoc = getLexer().getLoc();
4470   if (getParser().parseAbsoluteExpression(Size))
4471     return true;
4472   if (Size < 0)
4473     return Error(SizeLoc, "size must be non-negative");
4474   if (Size > LocalMemorySize)
4475     return Error(SizeLoc, "size is too large");
4476 
4477   int64_t Alignment = 4;
4478   if (getLexer().is(AsmToken::Comma)) {
4479     Lex();
4480     SMLoc AlignLoc = getLexer().getLoc();
4481     if (getParser().parseAbsoluteExpression(Alignment))
4482       return true;
4483     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4484       return Error(AlignLoc, "alignment must be a power of two");
4485 
4486     // Alignment larger than the size of LDS is possible in theory, as long
4487     // as the linker manages to place to symbol at address 0, but we do want
4488     // to make sure the alignment fits nicely into a 32-bit integer.
4489     if (Alignment >= 1u << 31)
4490       return Error(AlignLoc, "alignment is too large");
4491   }
4492 
4493   if (parseToken(AsmToken::EndOfStatement,
4494                  "unexpected token in '.amdgpu_lds' directive"))
4495     return true;
4496 
4497   Symbol->redefineIfPossible();
4498   if (!Symbol->isUndefined())
4499     return Error(NameLoc, "invalid symbol redefinition");
4500 
4501   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4502   return false;
4503 }
4504 
4505 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4506   StringRef IDVal = DirectiveID.getString();
4507 
4508   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4509     if (IDVal == ".amdgcn_target")
4510       return ParseDirectiveAMDGCNTarget();
4511 
4512     if (IDVal == ".amdhsa_kernel")
4513       return ParseDirectiveAMDHSAKernel();
4514 
4515     // TODO: Restructure/combine with PAL metadata directive.
4516     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4517       return ParseDirectiveHSAMetadata();
4518   } else {
4519     if (IDVal == ".hsa_code_object_version")
4520       return ParseDirectiveHSACodeObjectVersion();
4521 
4522     if (IDVal == ".hsa_code_object_isa")
4523       return ParseDirectiveHSACodeObjectISA();
4524 
4525     if (IDVal == ".amd_kernel_code_t")
4526       return ParseDirectiveAMDKernelCodeT();
4527 
4528     if (IDVal == ".amdgpu_hsa_kernel")
4529       return ParseDirectiveAMDGPUHsaKernel();
4530 
4531     if (IDVal == ".amd_amdgpu_isa")
4532       return ParseDirectiveISAVersion();
4533 
4534     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4535       return ParseDirectiveHSAMetadata();
4536   }
4537 
4538   if (IDVal == ".amdgpu_lds")
4539     return ParseDirectiveAMDGPULDS();
4540 
4541   if (IDVal == PALMD::AssemblerDirectiveBegin)
4542     return ParseDirectivePALMetadataBegin();
4543 
4544   if (IDVal == PALMD::AssemblerDirective)
4545     return ParseDirectivePALMetadata();
4546 
4547   return true;
4548 }
4549 
4550 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4551                                            unsigned RegNo) const {
4552 
4553   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4554        R.isValid(); ++R) {
4555     if (*R == RegNo)
4556       return isGFX9() || isGFX10();
4557   }
4558 
4559   // GFX10 has 2 more SGPRs 104 and 105.
4560   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4561        R.isValid(); ++R) {
4562     if (*R == RegNo)
4563       return hasSGPR104_SGPR105();
4564   }
4565 
4566   switch (RegNo) {
4567   case AMDGPU::SRC_SHARED_BASE:
4568   case AMDGPU::SRC_SHARED_LIMIT:
4569   case AMDGPU::SRC_PRIVATE_BASE:
4570   case AMDGPU::SRC_PRIVATE_LIMIT:
4571   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4572     return !isCI() && !isSI() && !isVI();
4573   case AMDGPU::TBA:
4574   case AMDGPU::TBA_LO:
4575   case AMDGPU::TBA_HI:
4576   case AMDGPU::TMA:
4577   case AMDGPU::TMA_LO:
4578   case AMDGPU::TMA_HI:
4579     return !isGFX9() && !isGFX10();
4580   case AMDGPU::XNACK_MASK:
4581   case AMDGPU::XNACK_MASK_LO:
4582   case AMDGPU::XNACK_MASK_HI:
4583     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4584   case AMDGPU::SGPR_NULL:
4585     return isGFX10();
4586   default:
4587     break;
4588   }
4589 
4590   if (isCI())
4591     return true;
4592 
4593   if (isSI() || isGFX10()) {
4594     // No flat_scr on SI.
4595     // On GFX10 flat scratch is not a valid register operand and can only be
4596     // accessed with s_setreg/s_getreg.
4597     switch (RegNo) {
4598     case AMDGPU::FLAT_SCR:
4599     case AMDGPU::FLAT_SCR_LO:
4600     case AMDGPU::FLAT_SCR_HI:
4601       return false;
4602     default:
4603       return true;
4604     }
4605   }
4606 
4607   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4608   // SI/CI have.
4609   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4610        R.isValid(); ++R) {
4611     if (*R == RegNo)
4612       return hasSGPR102_SGPR103();
4613   }
4614 
4615   return true;
4616 }
4617 
4618 OperandMatchResultTy
4619 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4620                               OperandMode Mode) {
4621   // Try to parse with a custom parser
4622   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4623 
4624   // If we successfully parsed the operand or if there as an error parsing,
4625   // we are done.
4626   //
4627   // If we are parsing after we reach EndOfStatement then this means we
4628   // are appending default values to the Operands list.  This is only done
4629   // by custom parser, so we shouldn't continue on to the generic parsing.
4630   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4631       getLexer().is(AsmToken::EndOfStatement))
4632     return ResTy;
4633 
4634   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4635     unsigned Prefix = Operands.size();
4636     SMLoc LBraceLoc = getTok().getLoc();
4637     Parser.Lex(); // eat the '['
4638 
4639     for (;;) {
4640       ResTy = parseReg(Operands);
4641       if (ResTy != MatchOperand_Success)
4642         return ResTy;
4643 
4644       if (getLexer().is(AsmToken::RBrac))
4645         break;
4646 
4647       if (getLexer().isNot(AsmToken::Comma))
4648         return MatchOperand_ParseFail;
4649       Parser.Lex();
4650     }
4651 
4652     if (Operands.size() - Prefix > 1) {
4653       Operands.insert(Operands.begin() + Prefix,
4654                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4655       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4656                                                     getTok().getLoc()));
4657     }
4658 
4659     Parser.Lex(); // eat the ']'
4660     return MatchOperand_Success;
4661   }
4662 
4663   return parseRegOrImm(Operands);
4664 }
4665 
4666 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4667   // Clear any forced encodings from the previous instruction.
4668   setForcedEncodingSize(0);
4669   setForcedDPP(false);
4670   setForcedSDWA(false);
4671 
4672   if (Name.endswith("_e64")) {
4673     setForcedEncodingSize(64);
4674     return Name.substr(0, Name.size() - 4);
4675   } else if (Name.endswith("_e32")) {
4676     setForcedEncodingSize(32);
4677     return Name.substr(0, Name.size() - 4);
4678   } else if (Name.endswith("_dpp")) {
4679     setForcedDPP(true);
4680     return Name.substr(0, Name.size() - 4);
4681   } else if (Name.endswith("_sdwa")) {
4682     setForcedSDWA(true);
4683     return Name.substr(0, Name.size() - 5);
4684   }
4685   return Name;
4686 }
4687 
4688 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4689                                        StringRef Name,
4690                                        SMLoc NameLoc, OperandVector &Operands) {
4691   // Add the instruction mnemonic
4692   Name = parseMnemonicSuffix(Name);
4693   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4694 
4695   bool IsMIMG = Name.startswith("image_");
4696 
4697   while (!getLexer().is(AsmToken::EndOfStatement)) {
4698     OperandMode Mode = OperandMode_Default;
4699     if (IsMIMG && isGFX10() && Operands.size() == 2)
4700       Mode = OperandMode_NSA;
4701     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4702 
4703     // Eat the comma or space if there is one.
4704     if (getLexer().is(AsmToken::Comma))
4705       Parser.Lex();
4706 
4707     if (Res != MatchOperand_Success) {
4708       if (!Parser.hasPendingError()) {
4709         // FIXME: use real operand location rather than the current location.
4710         StringRef Msg =
4711           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4712                                             "not a valid operand.";
4713         Error(getLexer().getLoc(), Msg);
4714       }
4715       while (!getLexer().is(AsmToken::EndOfStatement)) {
4716         Parser.Lex();
4717       }
4718       return true;
4719     }
4720   }
4721 
4722   return false;
4723 }
4724 
4725 //===----------------------------------------------------------------------===//
4726 // Utility functions
4727 //===----------------------------------------------------------------------===//
4728 
4729 OperandMatchResultTy
4730 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4731 
4732   if (!trySkipId(Prefix, AsmToken::Colon))
4733     return MatchOperand_NoMatch;
4734 
4735   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4736 }
4737 
4738 OperandMatchResultTy
4739 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4740                                     AMDGPUOperand::ImmTy ImmTy,
4741                                     bool (*ConvertResult)(int64_t&)) {
4742   SMLoc S = getLoc();
4743   int64_t Value = 0;
4744 
4745   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4746   if (Res != MatchOperand_Success)
4747     return Res;
4748 
4749   if (ConvertResult && !ConvertResult(Value)) {
4750     Error(S, "invalid " + StringRef(Prefix) + " value.");
4751   }
4752 
4753   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4754   return MatchOperand_Success;
4755 }
4756 
4757 OperandMatchResultTy
4758 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4759                                              OperandVector &Operands,
4760                                              AMDGPUOperand::ImmTy ImmTy,
4761                                              bool (*ConvertResult)(int64_t&)) {
4762   SMLoc S = getLoc();
4763   if (!trySkipId(Prefix, AsmToken::Colon))
4764     return MatchOperand_NoMatch;
4765 
4766   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4767     return MatchOperand_ParseFail;
4768 
4769   unsigned Val = 0;
4770   const unsigned MaxSize = 4;
4771 
4772   // FIXME: How to verify the number of elements matches the number of src
4773   // operands?
4774   for (int I = 0; ; ++I) {
4775     int64_t Op;
4776     SMLoc Loc = getLoc();
4777     if (!parseExpr(Op))
4778       return MatchOperand_ParseFail;
4779 
4780     if (Op != 0 && Op != 1) {
4781       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4782       return MatchOperand_ParseFail;
4783     }
4784 
4785     Val |= (Op << I);
4786 
4787     if (trySkipToken(AsmToken::RBrac))
4788       break;
4789 
4790     if (I + 1 == MaxSize) {
4791       Error(getLoc(), "expected a closing square bracket");
4792       return MatchOperand_ParseFail;
4793     }
4794 
4795     if (!skipToken(AsmToken::Comma, "expected a comma"))
4796       return MatchOperand_ParseFail;
4797   }
4798 
4799   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4800   return MatchOperand_Success;
4801 }
4802 
4803 OperandMatchResultTy
4804 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4805                                AMDGPUOperand::ImmTy ImmTy) {
4806   int64_t Bit = 0;
4807   SMLoc S = Parser.getTok().getLoc();
4808 
4809   // We are at the end of the statement, and this is a default argument, so
4810   // use a default value.
4811   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4812     switch(getLexer().getKind()) {
4813       case AsmToken::Identifier: {
4814         StringRef Tok = Parser.getTok().getString();
4815         if (Tok == Name) {
4816           if (Tok == "r128" && !hasMIMG_R128())
4817             Error(S, "r128 modifier is not supported on this GPU");
4818           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4819             Error(S, "a16 modifier is not supported on this GPU");
4820           Bit = 1;
4821           Parser.Lex();
4822         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4823           Bit = 0;
4824           Parser.Lex();
4825         } else {
4826           return MatchOperand_NoMatch;
4827         }
4828         break;
4829       }
4830       default:
4831         return MatchOperand_NoMatch;
4832     }
4833   }
4834 
4835   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4836     return MatchOperand_ParseFail;
4837 
4838   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4839     ImmTy = AMDGPUOperand::ImmTyR128A16;
4840 
4841   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4842   return MatchOperand_Success;
4843 }
4844 
4845 static void addOptionalImmOperand(
4846   MCInst& Inst, const OperandVector& Operands,
4847   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4848   AMDGPUOperand::ImmTy ImmT,
4849   int64_t Default = 0) {
4850   auto i = OptionalIdx.find(ImmT);
4851   if (i != OptionalIdx.end()) {
4852     unsigned Idx = i->second;
4853     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4854   } else {
4855     Inst.addOperand(MCOperand::createImm(Default));
4856   }
4857 }
4858 
4859 OperandMatchResultTy
4860 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4861   if (getLexer().isNot(AsmToken::Identifier)) {
4862     return MatchOperand_NoMatch;
4863   }
4864   StringRef Tok = Parser.getTok().getString();
4865   if (Tok != Prefix) {
4866     return MatchOperand_NoMatch;
4867   }
4868 
4869   Parser.Lex();
4870   if (getLexer().isNot(AsmToken::Colon)) {
4871     return MatchOperand_ParseFail;
4872   }
4873 
4874   Parser.Lex();
4875   if (getLexer().isNot(AsmToken::Identifier)) {
4876     return MatchOperand_ParseFail;
4877   }
4878 
4879   Value = Parser.getTok().getString();
4880   return MatchOperand_Success;
4881 }
4882 
4883 //===----------------------------------------------------------------------===//
4884 // MTBUF format
4885 //===----------------------------------------------------------------------===//
4886 
4887 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
4888                                   int64_t MaxVal,
4889                                   int64_t &Fmt) {
4890   int64_t Val;
4891   SMLoc Loc = getLoc();
4892 
4893   auto Res = parseIntWithPrefix(Pref, Val);
4894   if (Res == MatchOperand_ParseFail)
4895     return false;
4896   if (Res == MatchOperand_NoMatch)
4897     return true;
4898 
4899   if (Val < 0 || Val > MaxVal) {
4900     Error(Loc, Twine("out of range ", StringRef(Pref)));
4901     return false;
4902   }
4903 
4904   Fmt = Val;
4905   return true;
4906 }
4907 
4908 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4909 // values to live in a joint format operand in the MCInst encoding.
4910 OperandMatchResultTy
4911 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
4912   using namespace llvm::AMDGPU::MTBUFFormat;
4913 
4914   int64_t Dfmt = DFMT_UNDEF;
4915   int64_t Nfmt = NFMT_UNDEF;
4916 
4917   // dfmt and nfmt can appear in either order, and each is optional.
4918   for (int I = 0; I < 2; ++I) {
4919     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
4920       return MatchOperand_ParseFail;
4921 
4922     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
4923       return MatchOperand_ParseFail;
4924     }
4925     // Skip optional comma between dfmt/nfmt
4926     // but guard against 2 commas following each other.
4927     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
4928         !peekToken().is(AsmToken::Comma)) {
4929       trySkipToken(AsmToken::Comma);
4930     }
4931   }
4932 
4933   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
4934     return MatchOperand_NoMatch;
4935 
4936   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
4937   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
4938 
4939   Format = encodeDfmtNfmt(Dfmt, Nfmt);
4940   return MatchOperand_Success;
4941 }
4942 
4943 OperandMatchResultTy
4944 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
4945   using namespace llvm::AMDGPU::MTBUFFormat;
4946 
4947   int64_t Fmt = UFMT_UNDEF;
4948 
4949   if (!tryParseFmt("format", UFMT_MAX, Fmt))
4950     return MatchOperand_ParseFail;
4951 
4952   if (Fmt == UFMT_UNDEF)
4953     return MatchOperand_NoMatch;
4954 
4955   Format = Fmt;
4956   return MatchOperand_Success;
4957 }
4958 
4959 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
4960                                     int64_t &Nfmt,
4961                                     StringRef FormatStr,
4962                                     SMLoc Loc) {
4963   using namespace llvm::AMDGPU::MTBUFFormat;
4964   int64_t Format;
4965 
4966   Format = getDfmt(FormatStr);
4967   if (Format != DFMT_UNDEF) {
4968     Dfmt = Format;
4969     return true;
4970   }
4971 
4972   Format = getNfmt(FormatStr, getSTI());
4973   if (Format != NFMT_UNDEF) {
4974     Nfmt = Format;
4975     return true;
4976   }
4977 
4978   Error(Loc, "unsupported format");
4979   return false;
4980 }
4981 
4982 OperandMatchResultTy
4983 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
4984                                           SMLoc FormatLoc,
4985                                           int64_t &Format) {
4986   using namespace llvm::AMDGPU::MTBUFFormat;
4987 
4988   int64_t Dfmt = DFMT_UNDEF;
4989   int64_t Nfmt = NFMT_UNDEF;
4990   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
4991     return MatchOperand_ParseFail;
4992 
4993   if (trySkipToken(AsmToken::Comma)) {
4994     StringRef Str;
4995     SMLoc Loc = getLoc();
4996     if (!parseId(Str, "expected a format string") ||
4997         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
4998       return MatchOperand_ParseFail;
4999     }
5000     if (Dfmt == DFMT_UNDEF) {
5001       Error(Loc, "duplicate numeric format");
5002       return MatchOperand_ParseFail;
5003     } else if (Nfmt == NFMT_UNDEF) {
5004       Error(Loc, "duplicate data format");
5005       return MatchOperand_ParseFail;
5006     }
5007   }
5008 
5009   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5010   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5011 
5012   if (isGFX10()) {
5013     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5014     if (Ufmt == UFMT_UNDEF) {
5015       Error(FormatLoc, "unsupported format");
5016       return MatchOperand_ParseFail;
5017     }
5018     Format = Ufmt;
5019   } else {
5020     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5021   }
5022 
5023   return MatchOperand_Success;
5024 }
5025 
5026 OperandMatchResultTy
5027 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5028                                             SMLoc Loc,
5029                                             int64_t &Format) {
5030   using namespace llvm::AMDGPU::MTBUFFormat;
5031 
5032   auto Id = getUnifiedFormat(FormatStr);
5033   if (Id == UFMT_UNDEF)
5034     return MatchOperand_NoMatch;
5035 
5036   if (!isGFX10()) {
5037     Error(Loc, "unified format is not supported on this GPU");
5038     return MatchOperand_ParseFail;
5039   }
5040 
5041   Format = Id;
5042   return MatchOperand_Success;
5043 }
5044 
5045 OperandMatchResultTy
5046 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5047   using namespace llvm::AMDGPU::MTBUFFormat;
5048   SMLoc Loc = getLoc();
5049 
5050   if (!parseExpr(Format))
5051     return MatchOperand_ParseFail;
5052   if (!isValidFormatEncoding(Format, getSTI())) {
5053     Error(Loc, "out of range format");
5054     return MatchOperand_ParseFail;
5055   }
5056 
5057   return MatchOperand_Success;
5058 }
5059 
5060 OperandMatchResultTy
5061 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5062   using namespace llvm::AMDGPU::MTBUFFormat;
5063 
5064   if (!trySkipId("format", AsmToken::Colon))
5065     return MatchOperand_NoMatch;
5066 
5067   if (trySkipToken(AsmToken::LBrac)) {
5068     StringRef FormatStr;
5069     SMLoc Loc = getLoc();
5070     if (!parseId(FormatStr, "expected a format string"))
5071       return MatchOperand_ParseFail;
5072 
5073     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5074     if (Res == MatchOperand_NoMatch)
5075       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5076     if (Res != MatchOperand_Success)
5077       return Res;
5078 
5079     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5080       return MatchOperand_ParseFail;
5081 
5082     return MatchOperand_Success;
5083   }
5084 
5085   return parseNumericFormat(Format);
5086 }
5087 
5088 OperandMatchResultTy
5089 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5090   using namespace llvm::AMDGPU::MTBUFFormat;
5091 
5092   int64_t Format = getDefaultFormatEncoding(getSTI());
5093   OperandMatchResultTy Res;
5094   SMLoc Loc = getLoc();
5095 
5096   // Parse legacy format syntax.
5097   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5098   if (Res == MatchOperand_ParseFail)
5099     return Res;
5100 
5101   bool FormatFound = (Res == MatchOperand_Success);
5102 
5103   Operands.push_back(
5104     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5105 
5106   if (FormatFound)
5107     trySkipToken(AsmToken::Comma);
5108 
5109   if (isToken(AsmToken::EndOfStatement)) {
5110     // We are expecting an soffset operand,
5111     // but let matcher handle the error.
5112     return MatchOperand_Success;
5113   }
5114 
5115   // Parse soffset.
5116   Res = parseRegOrImm(Operands);
5117   if (Res != MatchOperand_Success)
5118     return Res;
5119 
5120   trySkipToken(AsmToken::Comma);
5121 
5122   if (!FormatFound) {
5123     Res = parseSymbolicOrNumericFormat(Format);
5124     if (Res == MatchOperand_ParseFail)
5125       return Res;
5126     if (Res == MatchOperand_Success) {
5127       auto Size = Operands.size();
5128       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5129       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5130       Op.setImm(Format);
5131     }
5132     return MatchOperand_Success;
5133   }
5134 
5135   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5136     Error(getLoc(), "duplicate format");
5137     return MatchOperand_ParseFail;
5138   }
5139   return MatchOperand_Success;
5140 }
5141 
5142 //===----------------------------------------------------------------------===//
5143 // ds
5144 //===----------------------------------------------------------------------===//
5145 
5146 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5147                                     const OperandVector &Operands) {
5148   OptionalImmIndexMap OptionalIdx;
5149 
5150   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5151     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5152 
5153     // Add the register arguments
5154     if (Op.isReg()) {
5155       Op.addRegOperands(Inst, 1);
5156       continue;
5157     }
5158 
5159     // Handle optional arguments
5160     OptionalIdx[Op.getImmTy()] = i;
5161   }
5162 
5163   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5164   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5165   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5166 
5167   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5168 }
5169 
5170 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5171                                 bool IsGdsHardcoded) {
5172   OptionalImmIndexMap OptionalIdx;
5173 
5174   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5175     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5176 
5177     // Add the register arguments
5178     if (Op.isReg()) {
5179       Op.addRegOperands(Inst, 1);
5180       continue;
5181     }
5182 
5183     if (Op.isToken() && Op.getToken() == "gds") {
5184       IsGdsHardcoded = true;
5185       continue;
5186     }
5187 
5188     // Handle optional arguments
5189     OptionalIdx[Op.getImmTy()] = i;
5190   }
5191 
5192   AMDGPUOperand::ImmTy OffsetType =
5193     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5194      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5195      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5196                                                       AMDGPUOperand::ImmTyOffset;
5197 
5198   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5199 
5200   if (!IsGdsHardcoded) {
5201     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5202   }
5203   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5204 }
5205 
5206 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5207   OptionalImmIndexMap OptionalIdx;
5208 
5209   unsigned OperandIdx[4];
5210   unsigned EnMask = 0;
5211   int SrcIdx = 0;
5212 
5213   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5214     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5215 
5216     // Add the register arguments
5217     if (Op.isReg()) {
5218       assert(SrcIdx < 4);
5219       OperandIdx[SrcIdx] = Inst.size();
5220       Op.addRegOperands(Inst, 1);
5221       ++SrcIdx;
5222       continue;
5223     }
5224 
5225     if (Op.isOff()) {
5226       assert(SrcIdx < 4);
5227       OperandIdx[SrcIdx] = Inst.size();
5228       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5229       ++SrcIdx;
5230       continue;
5231     }
5232 
5233     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5234       Op.addImmOperands(Inst, 1);
5235       continue;
5236     }
5237 
5238     if (Op.isToken() && Op.getToken() == "done")
5239       continue;
5240 
5241     // Handle optional arguments
5242     OptionalIdx[Op.getImmTy()] = i;
5243   }
5244 
5245   assert(SrcIdx == 4);
5246 
5247   bool Compr = false;
5248   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5249     Compr = true;
5250     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5251     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5252     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5253   }
5254 
5255   for (auto i = 0; i < SrcIdx; ++i) {
5256     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5257       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5258     }
5259   }
5260 
5261   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5262   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5263 
5264   Inst.addOperand(MCOperand::createImm(EnMask));
5265 }
5266 
5267 //===----------------------------------------------------------------------===//
5268 // s_waitcnt
5269 //===----------------------------------------------------------------------===//
5270 
5271 static bool
5272 encodeCnt(
5273   const AMDGPU::IsaVersion ISA,
5274   int64_t &IntVal,
5275   int64_t CntVal,
5276   bool Saturate,
5277   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5278   unsigned (*decode)(const IsaVersion &Version, unsigned))
5279 {
5280   bool Failed = false;
5281 
5282   IntVal = encode(ISA, IntVal, CntVal);
5283   if (CntVal != decode(ISA, IntVal)) {
5284     if (Saturate) {
5285       IntVal = encode(ISA, IntVal, -1);
5286     } else {
5287       Failed = true;
5288     }
5289   }
5290   return Failed;
5291 }
5292 
5293 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5294 
5295   SMLoc CntLoc = getLoc();
5296   StringRef CntName = getTokenStr();
5297 
5298   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5299       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5300     return false;
5301 
5302   int64_t CntVal;
5303   SMLoc ValLoc = getLoc();
5304   if (!parseExpr(CntVal))
5305     return false;
5306 
5307   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5308 
5309   bool Failed = true;
5310   bool Sat = CntName.endswith("_sat");
5311 
5312   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5313     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5314   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5315     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5316   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5317     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5318   } else {
5319     Error(CntLoc, "invalid counter name " + CntName);
5320     return false;
5321   }
5322 
5323   if (Failed) {
5324     Error(ValLoc, "too large value for " + CntName);
5325     return false;
5326   }
5327 
5328   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5329     return false;
5330 
5331   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5332     if (isToken(AsmToken::EndOfStatement)) {
5333       Error(getLoc(), "expected a counter name");
5334       return false;
5335     }
5336   }
5337 
5338   return true;
5339 }
5340 
5341 OperandMatchResultTy
5342 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5343   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5344   int64_t Waitcnt = getWaitcntBitMask(ISA);
5345   SMLoc S = getLoc();
5346 
5347   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5348     while (!isToken(AsmToken::EndOfStatement)) {
5349       if (!parseCnt(Waitcnt))
5350         return MatchOperand_ParseFail;
5351     }
5352   } else {
5353     if (!parseExpr(Waitcnt))
5354       return MatchOperand_ParseFail;
5355   }
5356 
5357   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5358   return MatchOperand_Success;
5359 }
5360 
5361 bool
5362 AMDGPUOperand::isSWaitCnt() const {
5363   return isImm();
5364 }
5365 
5366 //===----------------------------------------------------------------------===//
5367 // hwreg
5368 //===----------------------------------------------------------------------===//
5369 
5370 bool
5371 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5372                                 int64_t &Offset,
5373                                 int64_t &Width) {
5374   using namespace llvm::AMDGPU::Hwreg;
5375 
5376   // The register may be specified by name or using a numeric code
5377   if (isToken(AsmToken::Identifier) &&
5378       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5379     HwReg.IsSymbolic = true;
5380     lex(); // skip message name
5381   } else if (!parseExpr(HwReg.Id)) {
5382     return false;
5383   }
5384 
5385   if (trySkipToken(AsmToken::RParen))
5386     return true;
5387 
5388   // parse optional params
5389   return
5390     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5391     parseExpr(Offset) &&
5392     skipToken(AsmToken::Comma, "expected a comma") &&
5393     parseExpr(Width) &&
5394     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5395 }
5396 
5397 bool
5398 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5399                                const int64_t Offset,
5400                                const int64_t Width,
5401                                const SMLoc Loc) {
5402 
5403   using namespace llvm::AMDGPU::Hwreg;
5404 
5405   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5406     Error(Loc, "specified hardware register is not supported on this GPU");
5407     return false;
5408   } else if (!isValidHwreg(HwReg.Id)) {
5409     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5410     return false;
5411   } else if (!isValidHwregOffset(Offset)) {
5412     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5413     return false;
5414   } else if (!isValidHwregWidth(Width)) {
5415     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5416     return false;
5417   }
5418   return true;
5419 }
5420 
5421 OperandMatchResultTy
5422 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5423   using namespace llvm::AMDGPU::Hwreg;
5424 
5425   int64_t ImmVal = 0;
5426   SMLoc Loc = getLoc();
5427 
5428   if (trySkipId("hwreg", AsmToken::LParen)) {
5429     OperandInfoTy HwReg(ID_UNKNOWN_);
5430     int64_t Offset = OFFSET_DEFAULT_;
5431     int64_t Width = WIDTH_DEFAULT_;
5432     if (parseHwregBody(HwReg, Offset, Width) &&
5433         validateHwreg(HwReg, Offset, Width, Loc)) {
5434       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5435     } else {
5436       return MatchOperand_ParseFail;
5437     }
5438   } else if (parseExpr(ImmVal)) {
5439     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5440       Error(Loc, "invalid immediate: only 16-bit values are legal");
5441       return MatchOperand_ParseFail;
5442     }
5443   } else {
5444     return MatchOperand_ParseFail;
5445   }
5446 
5447   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5448   return MatchOperand_Success;
5449 }
5450 
5451 bool AMDGPUOperand::isHwreg() const {
5452   return isImmTy(ImmTyHwreg);
5453 }
5454 
5455 //===----------------------------------------------------------------------===//
5456 // sendmsg
5457 //===----------------------------------------------------------------------===//
5458 
5459 bool
5460 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5461                                   OperandInfoTy &Op,
5462                                   OperandInfoTy &Stream) {
5463   using namespace llvm::AMDGPU::SendMsg;
5464 
5465   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5466     Msg.IsSymbolic = true;
5467     lex(); // skip message name
5468   } else if (!parseExpr(Msg.Id)) {
5469     return false;
5470   }
5471 
5472   if (trySkipToken(AsmToken::Comma)) {
5473     Op.IsDefined = true;
5474     if (isToken(AsmToken::Identifier) &&
5475         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5476       lex(); // skip operation name
5477     } else if (!parseExpr(Op.Id)) {
5478       return false;
5479     }
5480 
5481     if (trySkipToken(AsmToken::Comma)) {
5482       Stream.IsDefined = true;
5483       if (!parseExpr(Stream.Id))
5484         return false;
5485     }
5486   }
5487 
5488   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5489 }
5490 
5491 bool
5492 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5493                                  const OperandInfoTy &Op,
5494                                  const OperandInfoTy &Stream,
5495                                  const SMLoc S) {
5496   using namespace llvm::AMDGPU::SendMsg;
5497 
5498   // Validation strictness depends on whether message is specified
5499   // in a symbolc or in a numeric form. In the latter case
5500   // only encoding possibility is checked.
5501   bool Strict = Msg.IsSymbolic;
5502 
5503   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5504     Error(S, "invalid message id");
5505     return false;
5506   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5507     Error(S, Op.IsDefined ?
5508              "message does not support operations" :
5509              "missing message operation");
5510     return false;
5511   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5512     Error(S, "invalid operation id");
5513     return false;
5514   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5515     Error(S, "message operation does not support streams");
5516     return false;
5517   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5518     Error(S, "invalid message stream id");
5519     return false;
5520   }
5521   return true;
5522 }
5523 
5524 OperandMatchResultTy
5525 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5526   using namespace llvm::AMDGPU::SendMsg;
5527 
5528   int64_t ImmVal = 0;
5529   SMLoc Loc = getLoc();
5530 
5531   if (trySkipId("sendmsg", AsmToken::LParen)) {
5532     OperandInfoTy Msg(ID_UNKNOWN_);
5533     OperandInfoTy Op(OP_NONE_);
5534     OperandInfoTy Stream(STREAM_ID_NONE_);
5535     if (parseSendMsgBody(Msg, Op, Stream) &&
5536         validateSendMsg(Msg, Op, Stream, Loc)) {
5537       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5538     } else {
5539       return MatchOperand_ParseFail;
5540     }
5541   } else if (parseExpr(ImmVal)) {
5542     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5543       Error(Loc, "invalid immediate: only 16-bit values are legal");
5544       return MatchOperand_ParseFail;
5545     }
5546   } else {
5547     return MatchOperand_ParseFail;
5548   }
5549 
5550   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5551   return MatchOperand_Success;
5552 }
5553 
5554 bool AMDGPUOperand::isSendMsg() const {
5555   return isImmTy(ImmTySendMsg);
5556 }
5557 
5558 //===----------------------------------------------------------------------===//
5559 // v_interp
5560 //===----------------------------------------------------------------------===//
5561 
5562 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5563   if (getLexer().getKind() != AsmToken::Identifier)
5564     return MatchOperand_NoMatch;
5565 
5566   StringRef Str = Parser.getTok().getString();
5567   int Slot = StringSwitch<int>(Str)
5568     .Case("p10", 0)
5569     .Case("p20", 1)
5570     .Case("p0", 2)
5571     .Default(-1);
5572 
5573   SMLoc S = Parser.getTok().getLoc();
5574   if (Slot == -1)
5575     return MatchOperand_ParseFail;
5576 
5577   Parser.Lex();
5578   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5579                                               AMDGPUOperand::ImmTyInterpSlot));
5580   return MatchOperand_Success;
5581 }
5582 
5583 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5584   if (getLexer().getKind() != AsmToken::Identifier)
5585     return MatchOperand_NoMatch;
5586 
5587   StringRef Str = Parser.getTok().getString();
5588   if (!Str.startswith("attr"))
5589     return MatchOperand_NoMatch;
5590 
5591   StringRef Chan = Str.take_back(2);
5592   int AttrChan = StringSwitch<int>(Chan)
5593     .Case(".x", 0)
5594     .Case(".y", 1)
5595     .Case(".z", 2)
5596     .Case(".w", 3)
5597     .Default(-1);
5598   if (AttrChan == -1)
5599     return MatchOperand_ParseFail;
5600 
5601   Str = Str.drop_back(2).drop_front(4);
5602 
5603   uint8_t Attr;
5604   if (Str.getAsInteger(10, Attr))
5605     return MatchOperand_ParseFail;
5606 
5607   SMLoc S = Parser.getTok().getLoc();
5608   Parser.Lex();
5609   if (Attr > 63) {
5610     Error(S, "out of bounds attr");
5611     return MatchOperand_ParseFail;
5612   }
5613 
5614   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5615 
5616   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5617                                               AMDGPUOperand::ImmTyInterpAttr));
5618   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5619                                               AMDGPUOperand::ImmTyAttrChan));
5620   return MatchOperand_Success;
5621 }
5622 
5623 //===----------------------------------------------------------------------===//
5624 // exp
5625 //===----------------------------------------------------------------------===//
5626 
5627 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5628                                                       uint8_t &Val) {
5629   if (Str == "null") {
5630     Val = 9;
5631     return MatchOperand_Success;
5632   }
5633 
5634   if (Str.startswith("mrt")) {
5635     Str = Str.drop_front(3);
5636     if (Str == "z") { // == mrtz
5637       Val = 8;
5638       return MatchOperand_Success;
5639     }
5640 
5641     if (Str.getAsInteger(10, Val))
5642       return MatchOperand_ParseFail;
5643 
5644     if (Val > 7) {
5645       Error(getLoc(), "invalid exp target");
5646       return MatchOperand_ParseFail;
5647     }
5648 
5649     return MatchOperand_Success;
5650   }
5651 
5652   if (Str.startswith("pos")) {
5653     Str = Str.drop_front(3);
5654     if (Str.getAsInteger(10, Val))
5655       return MatchOperand_ParseFail;
5656 
5657     if (Val > 4 || (Val == 4 && !isGFX10())) {
5658       Error(getLoc(), "invalid exp target");
5659       return MatchOperand_ParseFail;
5660     }
5661 
5662     Val += 12;
5663     return MatchOperand_Success;
5664   }
5665 
5666   if (isGFX10() && Str == "prim") {
5667     Val = 20;
5668     return MatchOperand_Success;
5669   }
5670 
5671   if (Str.startswith("param")) {
5672     Str = Str.drop_front(5);
5673     if (Str.getAsInteger(10, Val))
5674       return MatchOperand_ParseFail;
5675 
5676     if (Val >= 32) {
5677       Error(getLoc(), "invalid exp target");
5678       return MatchOperand_ParseFail;
5679     }
5680 
5681     Val += 32;
5682     return MatchOperand_Success;
5683   }
5684 
5685   if (Str.startswith("invalid_target_")) {
5686     Str = Str.drop_front(15);
5687     if (Str.getAsInteger(10, Val))
5688       return MatchOperand_ParseFail;
5689 
5690     Error(getLoc(), "invalid exp target");
5691     return MatchOperand_ParseFail;
5692   }
5693 
5694   return MatchOperand_NoMatch;
5695 }
5696 
5697 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5698   uint8_t Val;
5699   StringRef Str = Parser.getTok().getString();
5700 
5701   auto Res = parseExpTgtImpl(Str, Val);
5702   if (Res != MatchOperand_Success)
5703     return Res;
5704 
5705   SMLoc S = Parser.getTok().getLoc();
5706   Parser.Lex();
5707 
5708   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5709                                               AMDGPUOperand::ImmTyExpTgt));
5710   return MatchOperand_Success;
5711 }
5712 
5713 //===----------------------------------------------------------------------===//
5714 // parser helpers
5715 //===----------------------------------------------------------------------===//
5716 
5717 bool
5718 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5719   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5720 }
5721 
5722 bool
5723 AMDGPUAsmParser::isId(const StringRef Id) const {
5724   return isId(getToken(), Id);
5725 }
5726 
5727 bool
5728 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5729   return getTokenKind() == Kind;
5730 }
5731 
5732 bool
5733 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5734   if (isId(Id)) {
5735     lex();
5736     return true;
5737   }
5738   return false;
5739 }
5740 
5741 bool
5742 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5743   if (isId(Id) && peekToken().is(Kind)) {
5744     lex();
5745     lex();
5746     return true;
5747   }
5748   return false;
5749 }
5750 
5751 bool
5752 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5753   if (isToken(Kind)) {
5754     lex();
5755     return true;
5756   }
5757   return false;
5758 }
5759 
5760 bool
5761 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5762                            const StringRef ErrMsg) {
5763   if (!trySkipToken(Kind)) {
5764     Error(getLoc(), ErrMsg);
5765     return false;
5766   }
5767   return true;
5768 }
5769 
5770 bool
5771 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5772   return !getParser().parseAbsoluteExpression(Imm);
5773 }
5774 
5775 bool
5776 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5777   SMLoc S = getLoc();
5778 
5779   const MCExpr *Expr;
5780   if (Parser.parseExpression(Expr))
5781     return false;
5782 
5783   int64_t IntVal;
5784   if (Expr->evaluateAsAbsolute(IntVal)) {
5785     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5786   } else {
5787     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5788   }
5789   return true;
5790 }
5791 
5792 bool
5793 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5794   if (isToken(AsmToken::String)) {
5795     Val = getToken().getStringContents();
5796     lex();
5797     return true;
5798   } else {
5799     Error(getLoc(), ErrMsg);
5800     return false;
5801   }
5802 }
5803 
5804 bool
5805 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5806   if (isToken(AsmToken::Identifier)) {
5807     Val = getTokenStr();
5808     lex();
5809     return true;
5810   } else {
5811     Error(getLoc(), ErrMsg);
5812     return false;
5813   }
5814 }
5815 
5816 AsmToken
5817 AMDGPUAsmParser::getToken() const {
5818   return Parser.getTok();
5819 }
5820 
5821 AsmToken
5822 AMDGPUAsmParser::peekToken() {
5823   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
5824 }
5825 
5826 void
5827 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5828   auto TokCount = getLexer().peekTokens(Tokens);
5829 
5830   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5831     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5832 }
5833 
5834 AsmToken::TokenKind
5835 AMDGPUAsmParser::getTokenKind() const {
5836   return getLexer().getKind();
5837 }
5838 
5839 SMLoc
5840 AMDGPUAsmParser::getLoc() const {
5841   return getToken().getLoc();
5842 }
5843 
5844 StringRef
5845 AMDGPUAsmParser::getTokenStr() const {
5846   return getToken().getString();
5847 }
5848 
5849 void
5850 AMDGPUAsmParser::lex() {
5851   Parser.Lex();
5852 }
5853 
5854 //===----------------------------------------------------------------------===//
5855 // swizzle
5856 //===----------------------------------------------------------------------===//
5857 
5858 LLVM_READNONE
5859 static unsigned
5860 encodeBitmaskPerm(const unsigned AndMask,
5861                   const unsigned OrMask,
5862                   const unsigned XorMask) {
5863   using namespace llvm::AMDGPU::Swizzle;
5864 
5865   return BITMASK_PERM_ENC |
5866          (AndMask << BITMASK_AND_SHIFT) |
5867          (OrMask  << BITMASK_OR_SHIFT)  |
5868          (XorMask << BITMASK_XOR_SHIFT);
5869 }
5870 
5871 bool
5872 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5873                                       const unsigned MinVal,
5874                                       const unsigned MaxVal,
5875                                       const StringRef ErrMsg) {
5876   for (unsigned i = 0; i < OpNum; ++i) {
5877     if (!skipToken(AsmToken::Comma, "expected a comma")){
5878       return false;
5879     }
5880     SMLoc ExprLoc = Parser.getTok().getLoc();
5881     if (!parseExpr(Op[i])) {
5882       return false;
5883     }
5884     if (Op[i] < MinVal || Op[i] > MaxVal) {
5885       Error(ExprLoc, ErrMsg);
5886       return false;
5887     }
5888   }
5889 
5890   return true;
5891 }
5892 
5893 bool
5894 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5895   using namespace llvm::AMDGPU::Swizzle;
5896 
5897   int64_t Lane[LANE_NUM];
5898   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5899                            "expected a 2-bit lane id")) {
5900     Imm = QUAD_PERM_ENC;
5901     for (unsigned I = 0; I < LANE_NUM; ++I) {
5902       Imm |= Lane[I] << (LANE_SHIFT * I);
5903     }
5904     return true;
5905   }
5906   return false;
5907 }
5908 
5909 bool
5910 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5911   using namespace llvm::AMDGPU::Swizzle;
5912 
5913   SMLoc S = Parser.getTok().getLoc();
5914   int64_t GroupSize;
5915   int64_t LaneIdx;
5916 
5917   if (!parseSwizzleOperands(1, &GroupSize,
5918                             2, 32,
5919                             "group size must be in the interval [2,32]")) {
5920     return false;
5921   }
5922   if (!isPowerOf2_64(GroupSize)) {
5923     Error(S, "group size must be a power of two");
5924     return false;
5925   }
5926   if (parseSwizzleOperands(1, &LaneIdx,
5927                            0, GroupSize - 1,
5928                            "lane id must be in the interval [0,group size - 1]")) {
5929     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5930     return true;
5931   }
5932   return false;
5933 }
5934 
5935 bool
5936 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5937   using namespace llvm::AMDGPU::Swizzle;
5938 
5939   SMLoc S = Parser.getTok().getLoc();
5940   int64_t GroupSize;
5941 
5942   if (!parseSwizzleOperands(1, &GroupSize,
5943       2, 32, "group size must be in the interval [2,32]")) {
5944     return false;
5945   }
5946   if (!isPowerOf2_64(GroupSize)) {
5947     Error(S, "group size must be a power of two");
5948     return false;
5949   }
5950 
5951   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5952   return true;
5953 }
5954 
5955 bool
5956 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5957   using namespace llvm::AMDGPU::Swizzle;
5958 
5959   SMLoc S = Parser.getTok().getLoc();
5960   int64_t GroupSize;
5961 
5962   if (!parseSwizzleOperands(1, &GroupSize,
5963       1, 16, "group size must be in the interval [1,16]")) {
5964     return false;
5965   }
5966   if (!isPowerOf2_64(GroupSize)) {
5967     Error(S, "group size must be a power of two");
5968     return false;
5969   }
5970 
5971   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5972   return true;
5973 }
5974 
5975 bool
5976 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5977   using namespace llvm::AMDGPU::Swizzle;
5978 
5979   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5980     return false;
5981   }
5982 
5983   StringRef Ctl;
5984   SMLoc StrLoc = Parser.getTok().getLoc();
5985   if (!parseString(Ctl)) {
5986     return false;
5987   }
5988   if (Ctl.size() != BITMASK_WIDTH) {
5989     Error(StrLoc, "expected a 5-character mask");
5990     return false;
5991   }
5992 
5993   unsigned AndMask = 0;
5994   unsigned OrMask = 0;
5995   unsigned XorMask = 0;
5996 
5997   for (size_t i = 0; i < Ctl.size(); ++i) {
5998     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5999     switch(Ctl[i]) {
6000     default:
6001       Error(StrLoc, "invalid mask");
6002       return false;
6003     case '0':
6004       break;
6005     case '1':
6006       OrMask |= Mask;
6007       break;
6008     case 'p':
6009       AndMask |= Mask;
6010       break;
6011     case 'i':
6012       AndMask |= Mask;
6013       XorMask |= Mask;
6014       break;
6015     }
6016   }
6017 
6018   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6019   return true;
6020 }
6021 
6022 bool
6023 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6024 
6025   SMLoc OffsetLoc = Parser.getTok().getLoc();
6026 
6027   if (!parseExpr(Imm)) {
6028     return false;
6029   }
6030   if (!isUInt<16>(Imm)) {
6031     Error(OffsetLoc, "expected a 16-bit offset");
6032     return false;
6033   }
6034   return true;
6035 }
6036 
6037 bool
6038 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6039   using namespace llvm::AMDGPU::Swizzle;
6040 
6041   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6042 
6043     SMLoc ModeLoc = Parser.getTok().getLoc();
6044     bool Ok = false;
6045 
6046     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6047       Ok = parseSwizzleQuadPerm(Imm);
6048     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6049       Ok = parseSwizzleBitmaskPerm(Imm);
6050     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6051       Ok = parseSwizzleBroadcast(Imm);
6052     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6053       Ok = parseSwizzleSwap(Imm);
6054     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6055       Ok = parseSwizzleReverse(Imm);
6056     } else {
6057       Error(ModeLoc, "expected a swizzle mode");
6058     }
6059 
6060     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6061   }
6062 
6063   return false;
6064 }
6065 
6066 OperandMatchResultTy
6067 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6068   SMLoc S = Parser.getTok().getLoc();
6069   int64_t Imm = 0;
6070 
6071   if (trySkipId("offset")) {
6072 
6073     bool Ok = false;
6074     if (skipToken(AsmToken::Colon, "expected a colon")) {
6075       if (trySkipId("swizzle")) {
6076         Ok = parseSwizzleMacro(Imm);
6077       } else {
6078         Ok = parseSwizzleOffset(Imm);
6079       }
6080     }
6081 
6082     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6083 
6084     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6085   } else {
6086     // Swizzle "offset" operand is optional.
6087     // If it is omitted, try parsing other optional operands.
6088     return parseOptionalOpr(Operands);
6089   }
6090 }
6091 
6092 bool
6093 AMDGPUOperand::isSwizzle() const {
6094   return isImmTy(ImmTySwizzle);
6095 }
6096 
6097 //===----------------------------------------------------------------------===//
6098 // VGPR Index Mode
6099 //===----------------------------------------------------------------------===//
6100 
6101 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6102 
6103   using namespace llvm::AMDGPU::VGPRIndexMode;
6104 
6105   if (trySkipToken(AsmToken::RParen)) {
6106     return OFF;
6107   }
6108 
6109   int64_t Imm = 0;
6110 
6111   while (true) {
6112     unsigned Mode = 0;
6113     SMLoc S = Parser.getTok().getLoc();
6114 
6115     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6116       if (trySkipId(IdSymbolic[ModeId])) {
6117         Mode = 1 << ModeId;
6118         break;
6119       }
6120     }
6121 
6122     if (Mode == 0) {
6123       Error(S, (Imm == 0)?
6124                "expected a VGPR index mode or a closing parenthesis" :
6125                "expected a VGPR index mode");
6126       return UNDEF;
6127     }
6128 
6129     if (Imm & Mode) {
6130       Error(S, "duplicate VGPR index mode");
6131       return UNDEF;
6132     }
6133     Imm |= Mode;
6134 
6135     if (trySkipToken(AsmToken::RParen))
6136       break;
6137     if (!skipToken(AsmToken::Comma,
6138                    "expected a comma or a closing parenthesis"))
6139       return UNDEF;
6140   }
6141 
6142   return Imm;
6143 }
6144 
6145 OperandMatchResultTy
6146 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6147 
6148   using namespace llvm::AMDGPU::VGPRIndexMode;
6149 
6150   int64_t Imm = 0;
6151   SMLoc S = Parser.getTok().getLoc();
6152 
6153   if (getLexer().getKind() == AsmToken::Identifier &&
6154       Parser.getTok().getString() == "gpr_idx" &&
6155       getLexer().peekTok().is(AsmToken::LParen)) {
6156 
6157     Parser.Lex();
6158     Parser.Lex();
6159 
6160     Imm = parseGPRIdxMacro();
6161     if (Imm == UNDEF)
6162       return MatchOperand_ParseFail;
6163 
6164   } else {
6165     if (getParser().parseAbsoluteExpression(Imm))
6166       return MatchOperand_ParseFail;
6167     if (Imm < 0 || !isUInt<4>(Imm)) {
6168       Error(S, "invalid immediate: only 4-bit values are legal");
6169       return MatchOperand_ParseFail;
6170     }
6171   }
6172 
6173   Operands.push_back(
6174       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6175   return MatchOperand_Success;
6176 }
6177 
6178 bool AMDGPUOperand::isGPRIdxMode() const {
6179   return isImmTy(ImmTyGprIdxMode);
6180 }
6181 
6182 //===----------------------------------------------------------------------===//
6183 // sopp branch targets
6184 //===----------------------------------------------------------------------===//
6185 
6186 OperandMatchResultTy
6187 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6188 
6189   // Make sure we are not parsing something
6190   // that looks like a label or an expression but is not.
6191   // This will improve error messages.
6192   if (isRegister() || isModifier())
6193     return MatchOperand_NoMatch;
6194 
6195   if (!parseExpr(Operands))
6196     return MatchOperand_ParseFail;
6197 
6198   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6199   assert(Opr.isImm() || Opr.isExpr());
6200   SMLoc Loc = Opr.getStartLoc();
6201 
6202   // Currently we do not support arbitrary expressions as branch targets.
6203   // Only labels and absolute expressions are accepted.
6204   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6205     Error(Loc, "expected an absolute expression or a label");
6206   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6207     Error(Loc, "expected a 16-bit signed jump offset");
6208   }
6209 
6210   return MatchOperand_Success;
6211 }
6212 
6213 //===----------------------------------------------------------------------===//
6214 // Boolean holding registers
6215 //===----------------------------------------------------------------------===//
6216 
6217 OperandMatchResultTy
6218 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6219   return parseReg(Operands);
6220 }
6221 
6222 //===----------------------------------------------------------------------===//
6223 // mubuf
6224 //===----------------------------------------------------------------------===//
6225 
6226 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6227   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6228 }
6229 
6230 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6231   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6232 }
6233 
6234 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6235   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6236 }
6237 
6238 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6239                                const OperandVector &Operands,
6240                                bool IsAtomic,
6241                                bool IsAtomicReturn,
6242                                bool IsLds) {
6243   bool IsLdsOpcode = IsLds;
6244   bool HasLdsModifier = false;
6245   OptionalImmIndexMap OptionalIdx;
6246   assert(IsAtomicReturn ? IsAtomic : true);
6247   unsigned FirstOperandIdx = 1;
6248 
6249   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6250     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6251 
6252     // Add the register arguments
6253     if (Op.isReg()) {
6254       Op.addRegOperands(Inst, 1);
6255       // Insert a tied src for atomic return dst.
6256       // This cannot be postponed as subsequent calls to
6257       // addImmOperands rely on correct number of MC operands.
6258       if (IsAtomicReturn && i == FirstOperandIdx)
6259         Op.addRegOperands(Inst, 1);
6260       continue;
6261     }
6262 
6263     // Handle the case where soffset is an immediate
6264     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6265       Op.addImmOperands(Inst, 1);
6266       continue;
6267     }
6268 
6269     HasLdsModifier |= Op.isLDS();
6270 
6271     // Handle tokens like 'offen' which are sometimes hard-coded into the
6272     // asm string.  There are no MCInst operands for these.
6273     if (Op.isToken()) {
6274       continue;
6275     }
6276     assert(Op.isImm());
6277 
6278     // Handle optional arguments
6279     OptionalIdx[Op.getImmTy()] = i;
6280   }
6281 
6282   // This is a workaround for an llvm quirk which may result in an
6283   // incorrect instruction selection. Lds and non-lds versions of
6284   // MUBUF instructions are identical except that lds versions
6285   // have mandatory 'lds' modifier. However this modifier follows
6286   // optional modifiers and llvm asm matcher regards this 'lds'
6287   // modifier as an optional one. As a result, an lds version
6288   // of opcode may be selected even if it has no 'lds' modifier.
6289   if (IsLdsOpcode && !HasLdsModifier) {
6290     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6291     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6292       Inst.setOpcode(NoLdsOpcode);
6293       IsLdsOpcode = false;
6294     }
6295   }
6296 
6297   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6298   if (!IsAtomic) { // glc is hard-coded.
6299     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6300   }
6301   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6302 
6303   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6304     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6305   }
6306 
6307   if (isGFX10())
6308     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6309 }
6310 
6311 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6312   OptionalImmIndexMap OptionalIdx;
6313 
6314   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6315     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6316 
6317     // Add the register arguments
6318     if (Op.isReg()) {
6319       Op.addRegOperands(Inst, 1);
6320       continue;
6321     }
6322 
6323     // Handle the case where soffset is an immediate
6324     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6325       Op.addImmOperands(Inst, 1);
6326       continue;
6327     }
6328 
6329     // Handle tokens like 'offen' which are sometimes hard-coded into the
6330     // asm string.  There are no MCInst operands for these.
6331     if (Op.isToken()) {
6332       continue;
6333     }
6334     assert(Op.isImm());
6335 
6336     // Handle optional arguments
6337     OptionalIdx[Op.getImmTy()] = i;
6338   }
6339 
6340   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6341                         AMDGPUOperand::ImmTyOffset);
6342   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6343   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6344   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6345   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6346 
6347   if (isGFX10())
6348     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6349 }
6350 
6351 //===----------------------------------------------------------------------===//
6352 // mimg
6353 //===----------------------------------------------------------------------===//
6354 
6355 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6356                               bool IsAtomic) {
6357   unsigned I = 1;
6358   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6359   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6360     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6361   }
6362 
6363   if (IsAtomic) {
6364     // Add src, same as dst
6365     assert(Desc.getNumDefs() == 1);
6366     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6367   }
6368 
6369   OptionalImmIndexMap OptionalIdx;
6370 
6371   for (unsigned E = Operands.size(); I != E; ++I) {
6372     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6373 
6374     // Add the register arguments
6375     if (Op.isReg()) {
6376       Op.addRegOperands(Inst, 1);
6377     } else if (Op.isImmModifier()) {
6378       OptionalIdx[Op.getImmTy()] = I;
6379     } else if (!Op.isToken()) {
6380       llvm_unreachable("unexpected operand type");
6381     }
6382   }
6383 
6384   bool IsGFX10 = isGFX10();
6385 
6386   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6387   if (IsGFX10)
6388     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6389   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6390   if (IsGFX10)
6391     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6392   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6393   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6394   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6395   if (IsGFX10)
6396     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6397   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6398   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6399   if (!IsGFX10)
6400     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6401   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6402 }
6403 
6404 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6405   cvtMIMG(Inst, Operands, true);
6406 }
6407 
6408 //===----------------------------------------------------------------------===//
6409 // smrd
6410 //===----------------------------------------------------------------------===//
6411 
6412 bool AMDGPUOperand::isSMRDOffset8() const {
6413   return isImm() && isUInt<8>(getImm());
6414 }
6415 
6416 bool AMDGPUOperand::isSMEMOffset() const {
6417   return isImm(); // Offset range is checked later by validator.
6418 }
6419 
6420 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6421   // 32-bit literals are only supported on CI and we only want to use them
6422   // when the offset is > 8-bits.
6423   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6424 }
6425 
6426 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6427   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6428 }
6429 
6430 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6431   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6432 }
6433 
6434 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6435   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6436 }
6437 
6438 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6439   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6440 }
6441 
6442 //===----------------------------------------------------------------------===//
6443 // vop3
6444 //===----------------------------------------------------------------------===//
6445 
6446 static bool ConvertOmodMul(int64_t &Mul) {
6447   if (Mul != 1 && Mul != 2 && Mul != 4)
6448     return false;
6449 
6450   Mul >>= 1;
6451   return true;
6452 }
6453 
6454 static bool ConvertOmodDiv(int64_t &Div) {
6455   if (Div == 1) {
6456     Div = 0;
6457     return true;
6458   }
6459 
6460   if (Div == 2) {
6461     Div = 3;
6462     return true;
6463   }
6464 
6465   return false;
6466 }
6467 
6468 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6469   if (BoundCtrl == 0) {
6470     BoundCtrl = 1;
6471     return true;
6472   }
6473 
6474   if (BoundCtrl == -1) {
6475     BoundCtrl = 0;
6476     return true;
6477   }
6478 
6479   return false;
6480 }
6481 
6482 // Note: the order in this table matches the order of operands in AsmString.
6483 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6484   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6485   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6486   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6487   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6488   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6489   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6490   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6491   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6492   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6493   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6494   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6495   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6496   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6497   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6498   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6499   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6500   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6501   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6502   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6503   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6504   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6505   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6506   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6507   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6508   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6509   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6510   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6511   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6512   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6513   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6514   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6515   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6516   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6517   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6518   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6519   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6520   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6521   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6522   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6523   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6524   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6525   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6526   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6527 };
6528 
6529 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6530 
6531   OperandMatchResultTy res = parseOptionalOpr(Operands);
6532 
6533   // This is a hack to enable hardcoded mandatory operands which follow
6534   // optional operands.
6535   //
6536   // Current design assumes that all operands after the first optional operand
6537   // are also optional. However implementation of some instructions violates
6538   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6539   //
6540   // To alleviate this problem, we have to (implicitly) parse extra operands
6541   // to make sure autogenerated parser of custom operands never hit hardcoded
6542   // mandatory operands.
6543 
6544   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6545     if (res != MatchOperand_Success ||
6546         isToken(AsmToken::EndOfStatement))
6547       break;
6548 
6549     trySkipToken(AsmToken::Comma);
6550     res = parseOptionalOpr(Operands);
6551   }
6552 
6553   return res;
6554 }
6555 
6556 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6557   OperandMatchResultTy res;
6558   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6559     // try to parse any optional operand here
6560     if (Op.IsBit) {
6561       res = parseNamedBit(Op.Name, Operands, Op.Type);
6562     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6563       res = parseOModOperand(Operands);
6564     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6565                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6566                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6567       res = parseSDWASel(Operands, Op.Name, Op.Type);
6568     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6569       res = parseSDWADstUnused(Operands);
6570     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6571                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6572                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6573                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6574       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6575                                         Op.ConvertResult);
6576     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6577       res = parseDim(Operands);
6578     } else {
6579       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6580     }
6581     if (res != MatchOperand_NoMatch) {
6582       return res;
6583     }
6584   }
6585   return MatchOperand_NoMatch;
6586 }
6587 
6588 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6589   StringRef Name = Parser.getTok().getString();
6590   if (Name == "mul") {
6591     return parseIntWithPrefix("mul", Operands,
6592                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6593   }
6594 
6595   if (Name == "div") {
6596     return parseIntWithPrefix("div", Operands,
6597                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6598   }
6599 
6600   return MatchOperand_NoMatch;
6601 }
6602 
6603 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6604   cvtVOP3P(Inst, Operands);
6605 
6606   int Opc = Inst.getOpcode();
6607 
6608   int SrcNum;
6609   const int Ops[] = { AMDGPU::OpName::src0,
6610                       AMDGPU::OpName::src1,
6611                       AMDGPU::OpName::src2 };
6612   for (SrcNum = 0;
6613        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6614        ++SrcNum);
6615   assert(SrcNum > 0);
6616 
6617   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6618   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6619 
6620   if ((OpSel & (1 << SrcNum)) != 0) {
6621     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6622     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6623     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6624   }
6625 }
6626 
6627 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6628       // 1. This operand is input modifiers
6629   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6630       // 2. This is not last operand
6631       && Desc.NumOperands > (OpNum + 1)
6632       // 3. Next operand is register class
6633       && Desc.OpInfo[OpNum + 1].RegClass != -1
6634       // 4. Next register is not tied to any other operand
6635       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6636 }
6637 
6638 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6639 {
6640   OptionalImmIndexMap OptionalIdx;
6641   unsigned Opc = Inst.getOpcode();
6642 
6643   unsigned I = 1;
6644   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6645   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6646     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6647   }
6648 
6649   for (unsigned E = Operands.size(); I != E; ++I) {
6650     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6651     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6652       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6653     } else if (Op.isInterpSlot() ||
6654                Op.isInterpAttr() ||
6655                Op.isAttrChan()) {
6656       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6657     } else if (Op.isImmModifier()) {
6658       OptionalIdx[Op.getImmTy()] = I;
6659     } else {
6660       llvm_unreachable("unhandled operand type");
6661     }
6662   }
6663 
6664   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6665     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6666   }
6667 
6668   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6669     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6670   }
6671 
6672   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6673     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6674   }
6675 }
6676 
6677 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6678                               OptionalImmIndexMap &OptionalIdx) {
6679   unsigned Opc = Inst.getOpcode();
6680 
6681   unsigned I = 1;
6682   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6683   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6684     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6685   }
6686 
6687   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6688     // This instruction has src modifiers
6689     for (unsigned E = Operands.size(); I != E; ++I) {
6690       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6691       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6692         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6693       } else if (Op.isImmModifier()) {
6694         OptionalIdx[Op.getImmTy()] = I;
6695       } else if (Op.isRegOrImm()) {
6696         Op.addRegOrImmOperands(Inst, 1);
6697       } else {
6698         llvm_unreachable("unhandled operand type");
6699       }
6700     }
6701   } else {
6702     // No src modifiers
6703     for (unsigned E = Operands.size(); I != E; ++I) {
6704       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6705       if (Op.isMod()) {
6706         OptionalIdx[Op.getImmTy()] = I;
6707       } else {
6708         Op.addRegOrImmOperands(Inst, 1);
6709       }
6710     }
6711   }
6712 
6713   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6714     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6715   }
6716 
6717   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6718     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6719   }
6720 
6721   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6722   // it has src2 register operand that is tied to dst operand
6723   // we don't allow modifiers for this operand in assembler so src2_modifiers
6724   // should be 0.
6725   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6726       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6727       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6728       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6729       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6730       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6731       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6732     auto it = Inst.begin();
6733     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6734     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6735     ++it;
6736     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6737   }
6738 }
6739 
6740 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6741   OptionalImmIndexMap OptionalIdx;
6742   cvtVOP3(Inst, Operands, OptionalIdx);
6743 }
6744 
6745 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6746                                const OperandVector &Operands) {
6747   OptionalImmIndexMap OptIdx;
6748   const int Opc = Inst.getOpcode();
6749   const MCInstrDesc &Desc = MII.get(Opc);
6750 
6751   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6752 
6753   cvtVOP3(Inst, Operands, OptIdx);
6754 
6755   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6756     assert(!IsPacked);
6757     Inst.addOperand(Inst.getOperand(0));
6758   }
6759 
6760   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6761   // instruction, and then figure out where to actually put the modifiers
6762 
6763   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6764 
6765   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6766   if (OpSelHiIdx != -1) {
6767     int DefaultVal = IsPacked ? -1 : 0;
6768     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6769                           DefaultVal);
6770   }
6771 
6772   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6773   if (NegLoIdx != -1) {
6774     assert(IsPacked);
6775     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6776     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6777   }
6778 
6779   const int Ops[] = { AMDGPU::OpName::src0,
6780                       AMDGPU::OpName::src1,
6781                       AMDGPU::OpName::src2 };
6782   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6783                          AMDGPU::OpName::src1_modifiers,
6784                          AMDGPU::OpName::src2_modifiers };
6785 
6786   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6787 
6788   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6789   unsigned OpSelHi = 0;
6790   unsigned NegLo = 0;
6791   unsigned NegHi = 0;
6792 
6793   if (OpSelHiIdx != -1) {
6794     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6795   }
6796 
6797   if (NegLoIdx != -1) {
6798     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6799     NegLo = Inst.getOperand(NegLoIdx).getImm();
6800     NegHi = Inst.getOperand(NegHiIdx).getImm();
6801   }
6802 
6803   for (int J = 0; J < 3; ++J) {
6804     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6805     if (OpIdx == -1)
6806       break;
6807 
6808     uint32_t ModVal = 0;
6809 
6810     if ((OpSel & (1 << J)) != 0)
6811       ModVal |= SISrcMods::OP_SEL_0;
6812 
6813     if ((OpSelHi & (1 << J)) != 0)
6814       ModVal |= SISrcMods::OP_SEL_1;
6815 
6816     if ((NegLo & (1 << J)) != 0)
6817       ModVal |= SISrcMods::NEG;
6818 
6819     if ((NegHi & (1 << J)) != 0)
6820       ModVal |= SISrcMods::NEG_HI;
6821 
6822     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6823 
6824     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6825   }
6826 }
6827 
6828 //===----------------------------------------------------------------------===//
6829 // dpp
6830 //===----------------------------------------------------------------------===//
6831 
6832 bool AMDGPUOperand::isDPP8() const {
6833   return isImmTy(ImmTyDPP8);
6834 }
6835 
6836 bool AMDGPUOperand::isDPPCtrl() const {
6837   using namespace AMDGPU::DPP;
6838 
6839   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6840   if (result) {
6841     int64_t Imm = getImm();
6842     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6843            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6844            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6845            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6846            (Imm == DppCtrl::WAVE_SHL1) ||
6847            (Imm == DppCtrl::WAVE_ROL1) ||
6848            (Imm == DppCtrl::WAVE_SHR1) ||
6849            (Imm == DppCtrl::WAVE_ROR1) ||
6850            (Imm == DppCtrl::ROW_MIRROR) ||
6851            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6852            (Imm == DppCtrl::BCAST15) ||
6853            (Imm == DppCtrl::BCAST31) ||
6854            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6855            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6856   }
6857   return false;
6858 }
6859 
6860 //===----------------------------------------------------------------------===//
6861 // mAI
6862 //===----------------------------------------------------------------------===//
6863 
6864 bool AMDGPUOperand::isBLGP() const {
6865   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6866 }
6867 
6868 bool AMDGPUOperand::isCBSZ() const {
6869   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6870 }
6871 
6872 bool AMDGPUOperand::isABID() const {
6873   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6874 }
6875 
6876 bool AMDGPUOperand::isS16Imm() const {
6877   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6878 }
6879 
6880 bool AMDGPUOperand::isU16Imm() const {
6881   return isImm() && isUInt<16>(getImm());
6882 }
6883 
6884 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6885   if (!isGFX10())
6886     return MatchOperand_NoMatch;
6887 
6888   SMLoc S = Parser.getTok().getLoc();
6889 
6890   if (getLexer().isNot(AsmToken::Identifier))
6891     return MatchOperand_NoMatch;
6892   if (getLexer().getTok().getString() != "dim")
6893     return MatchOperand_NoMatch;
6894 
6895   Parser.Lex();
6896   if (getLexer().isNot(AsmToken::Colon))
6897     return MatchOperand_ParseFail;
6898 
6899   Parser.Lex();
6900 
6901   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6902   // integer.
6903   std::string Token;
6904   if (getLexer().is(AsmToken::Integer)) {
6905     SMLoc Loc = getLexer().getTok().getEndLoc();
6906     Token = std::string(getLexer().getTok().getString());
6907     Parser.Lex();
6908     if (getLexer().getTok().getLoc() != Loc)
6909       return MatchOperand_ParseFail;
6910   }
6911   if (getLexer().isNot(AsmToken::Identifier))
6912     return MatchOperand_ParseFail;
6913   Token += getLexer().getTok().getString();
6914 
6915   StringRef DimId = Token;
6916   if (DimId.startswith("SQ_RSRC_IMG_"))
6917     DimId = DimId.substr(12);
6918 
6919   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6920   if (!DimInfo)
6921     return MatchOperand_ParseFail;
6922 
6923   Parser.Lex();
6924 
6925   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6926                                               AMDGPUOperand::ImmTyDim));
6927   return MatchOperand_Success;
6928 }
6929 
6930 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6931   SMLoc S = Parser.getTok().getLoc();
6932   StringRef Prefix;
6933 
6934   if (getLexer().getKind() == AsmToken::Identifier) {
6935     Prefix = Parser.getTok().getString();
6936   } else {
6937     return MatchOperand_NoMatch;
6938   }
6939 
6940   if (Prefix != "dpp8")
6941     return parseDPPCtrl(Operands);
6942   if (!isGFX10())
6943     return MatchOperand_NoMatch;
6944 
6945   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6946 
6947   int64_t Sels[8];
6948 
6949   Parser.Lex();
6950   if (getLexer().isNot(AsmToken::Colon))
6951     return MatchOperand_ParseFail;
6952 
6953   Parser.Lex();
6954   if (getLexer().isNot(AsmToken::LBrac))
6955     return MatchOperand_ParseFail;
6956 
6957   Parser.Lex();
6958   if (getParser().parseAbsoluteExpression(Sels[0]))
6959     return MatchOperand_ParseFail;
6960   if (0 > Sels[0] || 7 < Sels[0])
6961     return MatchOperand_ParseFail;
6962 
6963   for (size_t i = 1; i < 8; ++i) {
6964     if (getLexer().isNot(AsmToken::Comma))
6965       return MatchOperand_ParseFail;
6966 
6967     Parser.Lex();
6968     if (getParser().parseAbsoluteExpression(Sels[i]))
6969       return MatchOperand_ParseFail;
6970     if (0 > Sels[i] || 7 < Sels[i])
6971       return MatchOperand_ParseFail;
6972   }
6973 
6974   if (getLexer().isNot(AsmToken::RBrac))
6975     return MatchOperand_ParseFail;
6976   Parser.Lex();
6977 
6978   unsigned DPP8 = 0;
6979   for (size_t i = 0; i < 8; ++i)
6980     DPP8 |= (Sels[i] << (i * 3));
6981 
6982   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6983   return MatchOperand_Success;
6984 }
6985 
6986 OperandMatchResultTy
6987 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6988   using namespace AMDGPU::DPP;
6989 
6990   SMLoc S = Parser.getTok().getLoc();
6991   StringRef Prefix;
6992   int64_t Int;
6993 
6994   if (getLexer().getKind() == AsmToken::Identifier) {
6995     Prefix = Parser.getTok().getString();
6996   } else {
6997     return MatchOperand_NoMatch;
6998   }
6999 
7000   if (Prefix == "row_mirror") {
7001     Int = DppCtrl::ROW_MIRROR;
7002     Parser.Lex();
7003   } else if (Prefix == "row_half_mirror") {
7004     Int = DppCtrl::ROW_HALF_MIRROR;
7005     Parser.Lex();
7006   } else {
7007     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7008     if (Prefix != "quad_perm"
7009         && Prefix != "row_shl"
7010         && Prefix != "row_shr"
7011         && Prefix != "row_ror"
7012         && Prefix != "wave_shl"
7013         && Prefix != "wave_rol"
7014         && Prefix != "wave_shr"
7015         && Prefix != "wave_ror"
7016         && Prefix != "row_bcast"
7017         && Prefix != "row_share"
7018         && Prefix != "row_xmask") {
7019       return MatchOperand_NoMatch;
7020     }
7021 
7022     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7023       return MatchOperand_NoMatch;
7024 
7025     if (!isVI() && !isGFX9() &&
7026         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7027          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7028          Prefix == "row_bcast"))
7029       return MatchOperand_NoMatch;
7030 
7031     Parser.Lex();
7032     if (getLexer().isNot(AsmToken::Colon))
7033       return MatchOperand_ParseFail;
7034 
7035     if (Prefix == "quad_perm") {
7036       // quad_perm:[%d,%d,%d,%d]
7037       Parser.Lex();
7038       if (getLexer().isNot(AsmToken::LBrac))
7039         return MatchOperand_ParseFail;
7040       Parser.Lex();
7041 
7042       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7043         return MatchOperand_ParseFail;
7044 
7045       for (int i = 0; i < 3; ++i) {
7046         if (getLexer().isNot(AsmToken::Comma))
7047           return MatchOperand_ParseFail;
7048         Parser.Lex();
7049 
7050         int64_t Temp;
7051         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7052           return MatchOperand_ParseFail;
7053         const int shift = i*2 + 2;
7054         Int += (Temp << shift);
7055       }
7056 
7057       if (getLexer().isNot(AsmToken::RBrac))
7058         return MatchOperand_ParseFail;
7059       Parser.Lex();
7060     } else {
7061       // sel:%d
7062       Parser.Lex();
7063       if (getParser().parseAbsoluteExpression(Int))
7064         return MatchOperand_ParseFail;
7065 
7066       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7067         Int |= DppCtrl::ROW_SHL0;
7068       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7069         Int |= DppCtrl::ROW_SHR0;
7070       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7071         Int |= DppCtrl::ROW_ROR0;
7072       } else if (Prefix == "wave_shl" && 1 == Int) {
7073         Int = DppCtrl::WAVE_SHL1;
7074       } else if (Prefix == "wave_rol" && 1 == Int) {
7075         Int = DppCtrl::WAVE_ROL1;
7076       } else if (Prefix == "wave_shr" && 1 == Int) {
7077         Int = DppCtrl::WAVE_SHR1;
7078       } else if (Prefix == "wave_ror" && 1 == Int) {
7079         Int = DppCtrl::WAVE_ROR1;
7080       } else if (Prefix == "row_bcast") {
7081         if (Int == 15) {
7082           Int = DppCtrl::BCAST15;
7083         } else if (Int == 31) {
7084           Int = DppCtrl::BCAST31;
7085         } else {
7086           return MatchOperand_ParseFail;
7087         }
7088       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7089         Int |= DppCtrl::ROW_SHARE_FIRST;
7090       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7091         Int |= DppCtrl::ROW_XMASK_FIRST;
7092       } else {
7093         return MatchOperand_ParseFail;
7094       }
7095     }
7096   }
7097 
7098   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7099   return MatchOperand_Success;
7100 }
7101 
7102 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7103   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7104 }
7105 
7106 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7107   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7108 }
7109 
7110 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7111   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7112 }
7113 
7114 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7115   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7116 }
7117 
7118 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7119   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7120 }
7121 
7122 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7123   OptionalImmIndexMap OptionalIdx;
7124 
7125   unsigned I = 1;
7126   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7127   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7128     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7129   }
7130 
7131   int Fi = 0;
7132   for (unsigned E = Operands.size(); I != E; ++I) {
7133     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7134                                             MCOI::TIED_TO);
7135     if (TiedTo != -1) {
7136       assert((unsigned)TiedTo < Inst.getNumOperands());
7137       // handle tied old or src2 for MAC instructions
7138       Inst.addOperand(Inst.getOperand(TiedTo));
7139     }
7140     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7141     // Add the register arguments
7142     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7143       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7144       // Skip it.
7145       continue;
7146     }
7147 
7148     if (IsDPP8) {
7149       if (Op.isDPP8()) {
7150         Op.addImmOperands(Inst, 1);
7151       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7152         Op.addRegWithFPInputModsOperands(Inst, 2);
7153       } else if (Op.isFI()) {
7154         Fi = Op.getImm();
7155       } else if (Op.isReg()) {
7156         Op.addRegOperands(Inst, 1);
7157       } else {
7158         llvm_unreachable("Invalid operand type");
7159       }
7160     } else {
7161       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7162         Op.addRegWithFPInputModsOperands(Inst, 2);
7163       } else if (Op.isDPPCtrl()) {
7164         Op.addImmOperands(Inst, 1);
7165       } else if (Op.isImm()) {
7166         // Handle optional arguments
7167         OptionalIdx[Op.getImmTy()] = I;
7168       } else {
7169         llvm_unreachable("Invalid operand type");
7170       }
7171     }
7172   }
7173 
7174   if (IsDPP8) {
7175     using namespace llvm::AMDGPU::DPP;
7176     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7177   } else {
7178     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7179     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7180     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7181     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7182       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7183     }
7184   }
7185 }
7186 
7187 //===----------------------------------------------------------------------===//
7188 // sdwa
7189 //===----------------------------------------------------------------------===//
7190 
7191 OperandMatchResultTy
7192 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7193                               AMDGPUOperand::ImmTy Type) {
7194   using namespace llvm::AMDGPU::SDWA;
7195 
7196   SMLoc S = Parser.getTok().getLoc();
7197   StringRef Value;
7198   OperandMatchResultTy res;
7199 
7200   res = parseStringWithPrefix(Prefix, Value);
7201   if (res != MatchOperand_Success) {
7202     return res;
7203   }
7204 
7205   int64_t Int;
7206   Int = StringSwitch<int64_t>(Value)
7207         .Case("BYTE_0", SdwaSel::BYTE_0)
7208         .Case("BYTE_1", SdwaSel::BYTE_1)
7209         .Case("BYTE_2", SdwaSel::BYTE_2)
7210         .Case("BYTE_3", SdwaSel::BYTE_3)
7211         .Case("WORD_0", SdwaSel::WORD_0)
7212         .Case("WORD_1", SdwaSel::WORD_1)
7213         .Case("DWORD", SdwaSel::DWORD)
7214         .Default(0xffffffff);
7215   Parser.Lex(); // eat last token
7216 
7217   if (Int == 0xffffffff) {
7218     return MatchOperand_ParseFail;
7219   }
7220 
7221   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7222   return MatchOperand_Success;
7223 }
7224 
7225 OperandMatchResultTy
7226 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7227   using namespace llvm::AMDGPU::SDWA;
7228 
7229   SMLoc S = Parser.getTok().getLoc();
7230   StringRef Value;
7231   OperandMatchResultTy res;
7232 
7233   res = parseStringWithPrefix("dst_unused", Value);
7234   if (res != MatchOperand_Success) {
7235     return res;
7236   }
7237 
7238   int64_t Int;
7239   Int = StringSwitch<int64_t>(Value)
7240         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7241         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7242         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7243         .Default(0xffffffff);
7244   Parser.Lex(); // eat last token
7245 
7246   if (Int == 0xffffffff) {
7247     return MatchOperand_ParseFail;
7248   }
7249 
7250   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7251   return MatchOperand_Success;
7252 }
7253 
7254 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7255   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7256 }
7257 
7258 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7259   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7260 }
7261 
7262 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7263   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7264 }
7265 
7266 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7267   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7268 }
7269 
7270 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7271   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7272 }
7273 
7274 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7275                               uint64_t BasicInstType,
7276                               bool SkipDstVcc,
7277                               bool SkipSrcVcc) {
7278   using namespace llvm::AMDGPU::SDWA;
7279 
7280   OptionalImmIndexMap OptionalIdx;
7281   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7282   bool SkippedVcc = false;
7283 
7284   unsigned I = 1;
7285   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7286   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7287     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7288   }
7289 
7290   for (unsigned E = Operands.size(); I != E; ++I) {
7291     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7292     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7293         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7294       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7295       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7296       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7297       // Skip VCC only if we didn't skip it on previous iteration.
7298       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7299       if (BasicInstType == SIInstrFlags::VOP2 &&
7300           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7301            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7302         SkippedVcc = true;
7303         continue;
7304       } else if (BasicInstType == SIInstrFlags::VOPC &&
7305                  Inst.getNumOperands() == 0) {
7306         SkippedVcc = true;
7307         continue;
7308       }
7309     }
7310     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7311       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7312     } else if (Op.isImm()) {
7313       // Handle optional arguments
7314       OptionalIdx[Op.getImmTy()] = I;
7315     } else {
7316       llvm_unreachable("Invalid operand type");
7317     }
7318     SkippedVcc = false;
7319   }
7320 
7321   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7322       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7323       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7324     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7325     switch (BasicInstType) {
7326     case SIInstrFlags::VOP1:
7327       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7328       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7329         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7330       }
7331       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7332       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7333       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7334       break;
7335 
7336     case SIInstrFlags::VOP2:
7337       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7338       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7339         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7340       }
7341       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7342       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7343       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7344       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7345       break;
7346 
7347     case SIInstrFlags::VOPC:
7348       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7349         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7350       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7351       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7352       break;
7353 
7354     default:
7355       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7356     }
7357   }
7358 
7359   // special case v_mac_{f16, f32}:
7360   // it has src2 register operand that is tied to dst operand
7361   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7362       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7363     auto it = Inst.begin();
7364     std::advance(
7365       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7366     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7367   }
7368 }
7369 
7370 //===----------------------------------------------------------------------===//
7371 // mAI
7372 //===----------------------------------------------------------------------===//
7373 
7374 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7375   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7376 }
7377 
7378 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7379   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7380 }
7381 
7382 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7383   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7384 }
7385 
7386 /// Force static initialization.
7387 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7388   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7389   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7390 }
7391 
7392 #define GET_REGISTER_MATCHER
7393 #define GET_MATCHER_IMPLEMENTATION
7394 #define GET_MNEMONIC_SPELL_CHECKER
7395 #include "AMDGPUGenAsmMatcher.inc"
7396 
7397 // This fuction should be defined after auto-generated include so that we have
7398 // MatchClassKind enum defined
7399 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7400                                                      unsigned Kind) {
7401   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7402   // But MatchInstructionImpl() expects to meet token and fails to validate
7403   // operand. This method checks if we are given immediate operand but expect to
7404   // get corresponding token.
7405   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7406   switch (Kind) {
7407   case MCK_addr64:
7408     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7409   case MCK_gds:
7410     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7411   case MCK_lds:
7412     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7413   case MCK_glc:
7414     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7415   case MCK_idxen:
7416     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7417   case MCK_offen:
7418     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7419   case MCK_SSrcB32:
7420     // When operands have expression values, they will return true for isToken,
7421     // because it is not possible to distinguish between a token and an
7422     // expression at parse time. MatchInstructionImpl() will always try to
7423     // match an operand as a token, when isToken returns true, and when the
7424     // name of the expression is not a valid token, the match will fail,
7425     // so we need to handle it here.
7426     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7427   case MCK_SSrcF32:
7428     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7429   case MCK_SoppBrTarget:
7430     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7431   case MCK_VReg32OrOff:
7432     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7433   case MCK_InterpSlot:
7434     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7435   case MCK_Attr:
7436     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7437   case MCK_AttrChan:
7438     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7439   case MCK_ImmSMEMOffset:
7440     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7441   case MCK_SReg_64:
7442   case MCK_SReg_64_XEXEC:
7443     // Null is defined as a 32-bit register but
7444     // it should also be enabled with 64-bit operands.
7445     // The following code enables it for SReg_64 operands
7446     // used as source and destination. Remaining source
7447     // operands are handled in isInlinableImm.
7448     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7449   default:
7450     return Match_InvalidOperand;
7451   }
7452 }
7453 
7454 //===----------------------------------------------------------------------===//
7455 // endpgm
7456 //===----------------------------------------------------------------------===//
7457 
7458 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7459   SMLoc S = Parser.getTok().getLoc();
7460   int64_t Imm = 0;
7461 
7462   if (!parseExpr(Imm)) {
7463     // The operand is optional, if not present default to 0
7464     Imm = 0;
7465   }
7466 
7467   if (!isUInt<16>(Imm)) {
7468     Error(S, "expected a 16-bit value");
7469     return MatchOperand_ParseFail;
7470   }
7471 
7472   Operands.push_back(
7473       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7474   return MatchOperand_Success;
7475 }
7476 
7477 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7478