1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth);
1092 
1093   bool isRegister();
1094   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1095   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1096   void initializeGprCountSymbol(RegisterKind RegKind);
1097   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1098                              unsigned RegWidth);
1099   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1100                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1101   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1102                  bool IsGdsHardcoded);
1103 
1104 public:
1105   enum AMDGPUMatchResultTy {
1106     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1107   };
1108   enum OperandMode {
1109     OperandMode_Default,
1110     OperandMode_NSA,
1111   };
1112 
1113   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1114 
1115   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1116                const MCInstrInfo &MII,
1117                const MCTargetOptions &Options)
1118       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1119     MCAsmParserExtension::Initialize(Parser);
1120 
1121     if (getFeatureBits().none()) {
1122       // Set default features.
1123       copySTI().ToggleFeature("southern-islands");
1124     }
1125 
1126     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1127 
1128     {
1129       // TODO: make those pre-defined variables read-only.
1130       // Currently there is none suitable machinery in the core llvm-mc for this.
1131       // MCSymbol::isRedefinable is intended for another purpose, and
1132       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1133       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1134       MCContext &Ctx = getContext();
1135       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1136         MCSymbol *Sym =
1137             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1138         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1139         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1140         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1141         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1142         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1143       } else {
1144         MCSymbol *Sym =
1145             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1147         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1148         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1149         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1151       }
1152       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1153         initializeGprCountSymbol(IS_VGPR);
1154         initializeGprCountSymbol(IS_SGPR);
1155       } else
1156         KernelScope.initialize(getContext());
1157     }
1158   }
1159 
1160   bool hasXNACK() const {
1161     return AMDGPU::hasXNACK(getSTI());
1162   }
1163 
1164   bool hasMIMG_R128() const {
1165     return AMDGPU::hasMIMG_R128(getSTI());
1166   }
1167 
1168   bool hasPackedD16() const {
1169     return AMDGPU::hasPackedD16(getSTI());
1170   }
1171 
1172   bool hasGFX10A16() const {
1173     return AMDGPU::hasGFX10A16(getSTI());
1174   }
1175 
1176   bool isSI() const {
1177     return AMDGPU::isSI(getSTI());
1178   }
1179 
1180   bool isCI() const {
1181     return AMDGPU::isCI(getSTI());
1182   }
1183 
1184   bool isVI() const {
1185     return AMDGPU::isVI(getSTI());
1186   }
1187 
1188   bool isGFX9() const {
1189     return AMDGPU::isGFX9(getSTI());
1190   }
1191 
1192   bool isGFX10() const {
1193     return AMDGPU::isGFX10(getSTI());
1194   }
1195 
1196   bool isGFX10_BEncoding() const {
1197     return AMDGPU::isGFX10_BEncoding(getSTI());
1198   }
1199 
1200   bool hasInv2PiInlineImm() const {
1201     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1202   }
1203 
1204   bool hasFlatOffsets() const {
1205     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1206   }
1207 
1208   bool hasSGPR102_SGPR103() const {
1209     return !isVI() && !isGFX9();
1210   }
1211 
1212   bool hasSGPR104_SGPR105() const {
1213     return isGFX10();
1214   }
1215 
1216   bool hasIntClamp() const {
1217     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1218   }
1219 
1220   AMDGPUTargetStreamer &getTargetStreamer() {
1221     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1222     return static_cast<AMDGPUTargetStreamer &>(TS);
1223   }
1224 
1225   const MCRegisterInfo *getMRI() const {
1226     // We need this const_cast because for some reason getContext() is not const
1227     // in MCAsmParser.
1228     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1229   }
1230 
1231   const MCInstrInfo *getMII() const {
1232     return &MII;
1233   }
1234 
1235   const FeatureBitset &getFeatureBits() const {
1236     return getSTI().getFeatureBits();
1237   }
1238 
1239   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1240   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1241   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1242 
1243   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1244   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1245   bool isForcedDPP() const { return ForcedDPP; }
1246   bool isForcedSDWA() const { return ForcedSDWA; }
1247   ArrayRef<unsigned> getMatchedVariants() const;
1248 
1249   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1250   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1251                      bool RestoreOnFailure);
1252   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1253   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1254                                         SMLoc &EndLoc) override;
1255   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1256   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1257                                       unsigned Kind) override;
1258   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1259                                OperandVector &Operands, MCStreamer &Out,
1260                                uint64_t &ErrorInfo,
1261                                bool MatchingInlineAsm) override;
1262   bool ParseDirective(AsmToken DirectiveID) override;
1263   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1264                                     OperandMode Mode = OperandMode_Default);
1265   StringRef parseMnemonicSuffix(StringRef Name);
1266   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1267                         SMLoc NameLoc, OperandVector &Operands) override;
1268   //bool ProcessInstruction(MCInst &Inst);
1269 
1270   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1271 
1272   OperandMatchResultTy
1273   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1274                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1275                      bool (*ConvertResult)(int64_t &) = nullptr);
1276 
1277   OperandMatchResultTy
1278   parseOperandArrayWithPrefix(const char *Prefix,
1279                               OperandVector &Operands,
1280                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1281                               bool (*ConvertResult)(int64_t&) = nullptr);
1282 
1283   OperandMatchResultTy
1284   parseNamedBit(const char *Name, OperandVector &Operands,
1285                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1286   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1287                                              StringRef &Value);
1288 
1289   bool isModifier();
1290   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1291   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1292   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1293   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1294   bool parseSP3NegModifier();
1295   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1296   OperandMatchResultTy parseReg(OperandVector &Operands);
1297   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1298   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1299   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1300   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1301   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1302   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1303   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1304   OperandMatchResultTy parseUfmt(int64_t &Format);
1305   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1306   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1307   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1308   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1309   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1310   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1311   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1312 
1313   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1314   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1315   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1316   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1317 
1318   bool parseCnt(int64_t &IntVal);
1319   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1320   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1321 
1322 private:
1323   struct OperandInfoTy {
1324     int64_t Id;
1325     bool IsSymbolic = false;
1326     bool IsDefined = false;
1327 
1328     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1329   };
1330 
1331   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1332   bool validateSendMsg(const OperandInfoTy &Msg,
1333                        const OperandInfoTy &Op,
1334                        const OperandInfoTy &Stream,
1335                        const SMLoc Loc);
1336 
1337   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1338   bool validateHwreg(const OperandInfoTy &HwReg,
1339                      const int64_t Offset,
1340                      const int64_t Width,
1341                      const SMLoc Loc);
1342 
1343   void errorExpTgt();
1344   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1345   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1346   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1347 
1348   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1349   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1350   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1351   bool validateSOPLiteral(const MCInst &Inst) const;
1352   bool validateConstantBusLimitations(const MCInst &Inst);
1353   bool validateEarlyClobberLimitations(const MCInst &Inst);
1354   bool validateIntClampSupported(const MCInst &Inst);
1355   bool validateMIMGAtomicDMask(const MCInst &Inst);
1356   bool validateMIMGGatherDMask(const MCInst &Inst);
1357   bool validateMovrels(const MCInst &Inst);
1358   bool validateMIMGDataSize(const MCInst &Inst);
1359   bool validateMIMGAddrSize(const MCInst &Inst);
1360   bool validateMIMGD16(const MCInst &Inst);
1361   bool validateMIMGDim(const MCInst &Inst);
1362   bool validateLdsDirect(const MCInst &Inst);
1363   bool validateOpSel(const MCInst &Inst);
1364   bool validateVccOperand(unsigned Reg) const;
1365   bool validateVOP3Literal(const MCInst &Inst) const;
1366   bool validateMAIAccWrite(const MCInst &Inst);
1367   unsigned getConstantBusLimit(unsigned Opcode) const;
1368   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1369   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1370   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1371 
1372   bool isId(const StringRef Id) const;
1373   bool isId(const AsmToken &Token, const StringRef Id) const;
1374   bool isToken(const AsmToken::TokenKind Kind) const;
1375   bool trySkipId(const StringRef Id);
1376   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1377   bool trySkipToken(const AsmToken::TokenKind Kind);
1378   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1379   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1380   bool parseId(StringRef &Val, const StringRef ErrMsg);
1381 
1382   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1383   AsmToken::TokenKind getTokenKind() const;
1384   bool parseExpr(int64_t &Imm);
1385   bool parseExpr(OperandVector &Operands);
1386   StringRef getTokenStr() const;
1387   AsmToken peekToken();
1388   AsmToken getToken() const;
1389   SMLoc getLoc() const;
1390   void lex();
1391 
1392 public:
1393   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1394   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1395 
1396   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1397   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1398   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1399   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1400   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1401   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1402 
1403   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1404                             const unsigned MinVal,
1405                             const unsigned MaxVal,
1406                             const StringRef ErrMsg);
1407   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1408   bool parseSwizzleOffset(int64_t &Imm);
1409   bool parseSwizzleMacro(int64_t &Imm);
1410   bool parseSwizzleQuadPerm(int64_t &Imm);
1411   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1412   bool parseSwizzleBroadcast(int64_t &Imm);
1413   bool parseSwizzleSwap(int64_t &Imm);
1414   bool parseSwizzleReverse(int64_t &Imm);
1415 
1416   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1417   int64_t parseGPRIdxMacro();
1418 
1419   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1420   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1421   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1422   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1423   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1424 
1425   AMDGPUOperand::Ptr defaultDLC() const;
1426   AMDGPUOperand::Ptr defaultGLC() const;
1427   AMDGPUOperand::Ptr defaultSLC() const;
1428 
1429   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1430   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1431   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1432   AMDGPUOperand::Ptr defaultFlatOffset() const;
1433 
1434   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1435 
1436   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1437                OptionalImmIndexMap &OptionalIdx);
1438   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1439   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1440   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1441 
1442   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1443 
1444   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1445                bool IsAtomic = false);
1446   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1447 
1448   OperandMatchResultTy parseDim(OperandVector &Operands);
1449   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1450   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1451   AMDGPUOperand::Ptr defaultRowMask() const;
1452   AMDGPUOperand::Ptr defaultBankMask() const;
1453   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1454   AMDGPUOperand::Ptr defaultFI() const;
1455   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1456   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1457 
1458   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1459                                     AMDGPUOperand::ImmTy Type);
1460   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1461   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1462   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1463   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1464   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1465   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1466   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1467                uint64_t BasicInstType,
1468                bool SkipDstVcc = false,
1469                bool SkipSrcVcc = false);
1470 
1471   AMDGPUOperand::Ptr defaultBLGP() const;
1472   AMDGPUOperand::Ptr defaultCBSZ() const;
1473   AMDGPUOperand::Ptr defaultABID() const;
1474 
1475   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1476   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1477 };
1478 
1479 struct OptionalOperand {
1480   const char *Name;
1481   AMDGPUOperand::ImmTy Type;
1482   bool IsBit;
1483   bool (*ConvertResult)(int64_t&);
1484 };
1485 
1486 } // end anonymous namespace
1487 
1488 // May be called with integer type with equivalent bitwidth.
1489 static const fltSemantics *getFltSemantics(unsigned Size) {
1490   switch (Size) {
1491   case 4:
1492     return &APFloat::IEEEsingle();
1493   case 8:
1494     return &APFloat::IEEEdouble();
1495   case 2:
1496     return &APFloat::IEEEhalf();
1497   default:
1498     llvm_unreachable("unsupported fp type");
1499   }
1500 }
1501 
1502 static const fltSemantics *getFltSemantics(MVT VT) {
1503   return getFltSemantics(VT.getSizeInBits() / 8);
1504 }
1505 
1506 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1507   switch (OperandType) {
1508   case AMDGPU::OPERAND_REG_IMM_INT32:
1509   case AMDGPU::OPERAND_REG_IMM_FP32:
1510   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1511   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1512   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1513   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1514     return &APFloat::IEEEsingle();
1515   case AMDGPU::OPERAND_REG_IMM_INT64:
1516   case AMDGPU::OPERAND_REG_IMM_FP64:
1517   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1518   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1519     return &APFloat::IEEEdouble();
1520   case AMDGPU::OPERAND_REG_IMM_INT16:
1521   case AMDGPU::OPERAND_REG_IMM_FP16:
1522   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1523   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1524   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1525   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1527   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1528   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1529   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1530   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1531   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1532     return &APFloat::IEEEhalf();
1533   default:
1534     llvm_unreachable("unsupported fp type");
1535   }
1536 }
1537 
1538 //===----------------------------------------------------------------------===//
1539 // Operand
1540 //===----------------------------------------------------------------------===//
1541 
1542 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1543   bool Lost;
1544 
1545   // Convert literal to single precision
1546   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1547                                                APFloat::rmNearestTiesToEven,
1548                                                &Lost);
1549   // We allow precision lost but not overflow or underflow
1550   if (Status != APFloat::opOK &&
1551       Lost &&
1552       ((Status & APFloat::opOverflow)  != 0 ||
1553        (Status & APFloat::opUnderflow) != 0)) {
1554     return false;
1555   }
1556 
1557   return true;
1558 }
1559 
1560 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1561   return isUIntN(Size, Val) || isIntN(Size, Val);
1562 }
1563 
1564 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1565   if (VT.getScalarType() == MVT::i16) {
1566     // FP immediate values are broken.
1567     return isInlinableIntLiteral(Val);
1568   }
1569 
1570   // f16/v2f16 operands work correctly for all values.
1571   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1572 }
1573 
1574 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1575 
1576   // This is a hack to enable named inline values like
1577   // shared_base with both 32-bit and 64-bit operands.
1578   // Note that these values are defined as
1579   // 32-bit operands only.
1580   if (isInlineValue()) {
1581     return true;
1582   }
1583 
1584   if (!isImmTy(ImmTyNone)) {
1585     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1586     return false;
1587   }
1588   // TODO: We should avoid using host float here. It would be better to
1589   // check the float bit values which is what a few other places do.
1590   // We've had bot failures before due to weird NaN support on mips hosts.
1591 
1592   APInt Literal(64, Imm.Val);
1593 
1594   if (Imm.IsFPImm) { // We got fp literal token
1595     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1596       return AMDGPU::isInlinableLiteral64(Imm.Val,
1597                                           AsmParser->hasInv2PiInlineImm());
1598     }
1599 
1600     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1601     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1602       return false;
1603 
1604     if (type.getScalarSizeInBits() == 16) {
1605       return isInlineableLiteralOp16(
1606         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1607         type, AsmParser->hasInv2PiInlineImm());
1608     }
1609 
1610     // Check if single precision literal is inlinable
1611     return AMDGPU::isInlinableLiteral32(
1612       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1613       AsmParser->hasInv2PiInlineImm());
1614   }
1615 
1616   // We got int literal token.
1617   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1618     return AMDGPU::isInlinableLiteral64(Imm.Val,
1619                                         AsmParser->hasInv2PiInlineImm());
1620   }
1621 
1622   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1623     return false;
1624   }
1625 
1626   if (type.getScalarSizeInBits() == 16) {
1627     return isInlineableLiteralOp16(
1628       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1629       type, AsmParser->hasInv2PiInlineImm());
1630   }
1631 
1632   return AMDGPU::isInlinableLiteral32(
1633     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1634     AsmParser->hasInv2PiInlineImm());
1635 }
1636 
1637 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1638   // Check that this immediate can be added as literal
1639   if (!isImmTy(ImmTyNone)) {
1640     return false;
1641   }
1642 
1643   if (!Imm.IsFPImm) {
1644     // We got int literal token.
1645 
1646     if (type == MVT::f64 && hasFPModifiers()) {
1647       // Cannot apply fp modifiers to int literals preserving the same semantics
1648       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1649       // disable these cases.
1650       return false;
1651     }
1652 
1653     unsigned Size = type.getSizeInBits();
1654     if (Size == 64)
1655       Size = 32;
1656 
1657     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1658     // types.
1659     return isSafeTruncation(Imm.Val, Size);
1660   }
1661 
1662   // We got fp literal token
1663   if (type == MVT::f64) { // Expected 64-bit fp operand
1664     // We would set low 64-bits of literal to zeroes but we accept this literals
1665     return true;
1666   }
1667 
1668   if (type == MVT::i64) { // Expected 64-bit int operand
1669     // We don't allow fp literals in 64-bit integer instructions. It is
1670     // unclear how we should encode them.
1671     return false;
1672   }
1673 
1674   // We allow fp literals with f16x2 operands assuming that the specified
1675   // literal goes into the lower half and the upper half is zero. We also
1676   // require that the literal may be losslesly converted to f16.
1677   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1678                      (type == MVT::v2i16)? MVT::i16 : type;
1679 
1680   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1681   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1682 }
1683 
1684 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1685   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1686 }
1687 
1688 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1689   if (AsmParser->isVI())
1690     return isVReg32();
1691   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1692     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1693   else
1694     return false;
1695 }
1696 
1697 bool AMDGPUOperand::isSDWAFP16Operand() const {
1698   return isSDWAOperand(MVT::f16);
1699 }
1700 
1701 bool AMDGPUOperand::isSDWAFP32Operand() const {
1702   return isSDWAOperand(MVT::f32);
1703 }
1704 
1705 bool AMDGPUOperand::isSDWAInt16Operand() const {
1706   return isSDWAOperand(MVT::i16);
1707 }
1708 
1709 bool AMDGPUOperand::isSDWAInt32Operand() const {
1710   return isSDWAOperand(MVT::i32);
1711 }
1712 
1713 bool AMDGPUOperand::isBoolReg() const {
1714   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1715          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1716 }
1717 
1718 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1719 {
1720   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1721   assert(Size == 2 || Size == 4 || Size == 8);
1722 
1723   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1724 
1725   if (Imm.Mods.Abs) {
1726     Val &= ~FpSignMask;
1727   }
1728   if (Imm.Mods.Neg) {
1729     Val ^= FpSignMask;
1730   }
1731 
1732   return Val;
1733 }
1734 
1735 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1736   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1737                              Inst.getNumOperands())) {
1738     addLiteralImmOperand(Inst, Imm.Val,
1739                          ApplyModifiers &
1740                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1741   } else {
1742     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1743     Inst.addOperand(MCOperand::createImm(Imm.Val));
1744   }
1745 }
1746 
1747 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1748   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1749   auto OpNum = Inst.getNumOperands();
1750   // Check that this operand accepts literals
1751   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1752 
1753   if (ApplyModifiers) {
1754     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1755     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1756     Val = applyInputFPModifiers(Val, Size);
1757   }
1758 
1759   APInt Literal(64, Val);
1760   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1761 
1762   if (Imm.IsFPImm) { // We got fp literal token
1763     switch (OpTy) {
1764     case AMDGPU::OPERAND_REG_IMM_INT64:
1765     case AMDGPU::OPERAND_REG_IMM_FP64:
1766     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1767     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1768       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1769                                        AsmParser->hasInv2PiInlineImm())) {
1770         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1771         return;
1772       }
1773 
1774       // Non-inlineable
1775       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1776         // For fp operands we check if low 32 bits are zeros
1777         if (Literal.getLoBits(32) != 0) {
1778           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1779           "Can't encode literal as exact 64-bit floating-point operand. "
1780           "Low 32-bits will be set to zero");
1781         }
1782 
1783         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1784         return;
1785       }
1786 
1787       // We don't allow fp literals in 64-bit integer instructions. It is
1788       // unclear how we should encode them. This case should be checked earlier
1789       // in predicate methods (isLiteralImm())
1790       llvm_unreachable("fp literal in 64-bit integer instruction.");
1791 
1792     case AMDGPU::OPERAND_REG_IMM_INT32:
1793     case AMDGPU::OPERAND_REG_IMM_FP32:
1794     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1795     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1796     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1797     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1798     case AMDGPU::OPERAND_REG_IMM_INT16:
1799     case AMDGPU::OPERAND_REG_IMM_FP16:
1800     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1801     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1802     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1803     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1804     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1805     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1806     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1807     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1808     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1810       bool lost;
1811       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1812       // Convert literal to single precision
1813       FPLiteral.convert(*getOpFltSemantics(OpTy),
1814                         APFloat::rmNearestTiesToEven, &lost);
1815       // We allow precision lost but not overflow or underflow. This should be
1816       // checked earlier in isLiteralImm()
1817 
1818       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1819       Inst.addOperand(MCOperand::createImm(ImmVal));
1820       return;
1821     }
1822     default:
1823       llvm_unreachable("invalid operand size");
1824     }
1825 
1826     return;
1827   }
1828 
1829   // We got int literal token.
1830   // Only sign extend inline immediates.
1831   switch (OpTy) {
1832   case AMDGPU::OPERAND_REG_IMM_INT32:
1833   case AMDGPU::OPERAND_REG_IMM_FP32:
1834   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1835   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1836   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1838   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1839   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1840     if (isSafeTruncation(Val, 32) &&
1841         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1842                                      AsmParser->hasInv2PiInlineImm())) {
1843       Inst.addOperand(MCOperand::createImm(Val));
1844       return;
1845     }
1846 
1847     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1848     return;
1849 
1850   case AMDGPU::OPERAND_REG_IMM_INT64:
1851   case AMDGPU::OPERAND_REG_IMM_FP64:
1852   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1853   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1854     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1855       Inst.addOperand(MCOperand::createImm(Val));
1856       return;
1857     }
1858 
1859     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1860     return;
1861 
1862   case AMDGPU::OPERAND_REG_IMM_INT16:
1863   case AMDGPU::OPERAND_REG_IMM_FP16:
1864   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1868     if (isSafeTruncation(Val, 16) &&
1869         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1870                                      AsmParser->hasInv2PiInlineImm())) {
1871       Inst.addOperand(MCOperand::createImm(Val));
1872       return;
1873     }
1874 
1875     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1876     return;
1877 
1878   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1879   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1882     assert(isSafeTruncation(Val, 16));
1883     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1884                                         AsmParser->hasInv2PiInlineImm()));
1885 
1886     Inst.addOperand(MCOperand::createImm(Val));
1887     return;
1888   }
1889   default:
1890     llvm_unreachable("invalid operand size");
1891   }
1892 }
1893 
1894 template <unsigned Bitwidth>
1895 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1896   APInt Literal(64, Imm.Val);
1897 
1898   if (!Imm.IsFPImm) {
1899     // We got int literal token.
1900     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1901     return;
1902   }
1903 
1904   bool Lost;
1905   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1906   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1907                     APFloat::rmNearestTiesToEven, &Lost);
1908   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1909 }
1910 
1911 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1912   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1913 }
1914 
1915 static bool isInlineValue(unsigned Reg) {
1916   switch (Reg) {
1917   case AMDGPU::SRC_SHARED_BASE:
1918   case AMDGPU::SRC_SHARED_LIMIT:
1919   case AMDGPU::SRC_PRIVATE_BASE:
1920   case AMDGPU::SRC_PRIVATE_LIMIT:
1921   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1922     return true;
1923   case AMDGPU::SRC_VCCZ:
1924   case AMDGPU::SRC_EXECZ:
1925   case AMDGPU::SRC_SCC:
1926     return true;
1927   case AMDGPU::SGPR_NULL:
1928     return true;
1929   default:
1930     return false;
1931   }
1932 }
1933 
1934 bool AMDGPUOperand::isInlineValue() const {
1935   return isRegKind() && ::isInlineValue(getReg());
1936 }
1937 
1938 //===----------------------------------------------------------------------===//
1939 // AsmParser
1940 //===----------------------------------------------------------------------===//
1941 
1942 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1943   if (Is == IS_VGPR) {
1944     switch (RegWidth) {
1945       default: return -1;
1946       case 1: return AMDGPU::VGPR_32RegClassID;
1947       case 2: return AMDGPU::VReg_64RegClassID;
1948       case 3: return AMDGPU::VReg_96RegClassID;
1949       case 4: return AMDGPU::VReg_128RegClassID;
1950       case 5: return AMDGPU::VReg_160RegClassID;
1951       case 6: return AMDGPU::VReg_192RegClassID;
1952       case 8: return AMDGPU::VReg_256RegClassID;
1953       case 16: return AMDGPU::VReg_512RegClassID;
1954       case 32: return AMDGPU::VReg_1024RegClassID;
1955     }
1956   } else if (Is == IS_TTMP) {
1957     switch (RegWidth) {
1958       default: return -1;
1959       case 1: return AMDGPU::TTMP_32RegClassID;
1960       case 2: return AMDGPU::TTMP_64RegClassID;
1961       case 4: return AMDGPU::TTMP_128RegClassID;
1962       case 8: return AMDGPU::TTMP_256RegClassID;
1963       case 16: return AMDGPU::TTMP_512RegClassID;
1964     }
1965   } else if (Is == IS_SGPR) {
1966     switch (RegWidth) {
1967       default: return -1;
1968       case 1: return AMDGPU::SGPR_32RegClassID;
1969       case 2: return AMDGPU::SGPR_64RegClassID;
1970       case 3: return AMDGPU::SGPR_96RegClassID;
1971       case 4: return AMDGPU::SGPR_128RegClassID;
1972       case 5: return AMDGPU::SGPR_160RegClassID;
1973       case 6: return AMDGPU::SGPR_192RegClassID;
1974       case 8: return AMDGPU::SGPR_256RegClassID;
1975       case 16: return AMDGPU::SGPR_512RegClassID;
1976     }
1977   } else if (Is == IS_AGPR) {
1978     switch (RegWidth) {
1979       default: return -1;
1980       case 1: return AMDGPU::AGPR_32RegClassID;
1981       case 2: return AMDGPU::AReg_64RegClassID;
1982       case 3: return AMDGPU::AReg_96RegClassID;
1983       case 4: return AMDGPU::AReg_128RegClassID;
1984       case 5: return AMDGPU::AReg_160RegClassID;
1985       case 6: return AMDGPU::AReg_192RegClassID;
1986       case 8: return AMDGPU::AReg_256RegClassID;
1987       case 16: return AMDGPU::AReg_512RegClassID;
1988       case 32: return AMDGPU::AReg_1024RegClassID;
1989     }
1990   }
1991   return -1;
1992 }
1993 
1994 static unsigned getSpecialRegForName(StringRef RegName) {
1995   return StringSwitch<unsigned>(RegName)
1996     .Case("exec", AMDGPU::EXEC)
1997     .Case("vcc", AMDGPU::VCC)
1998     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1999     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2000     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2001     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2002     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2003     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2004     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2005     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2006     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2007     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2008     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2009     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2010     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2011     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2012     .Case("m0", AMDGPU::M0)
2013     .Case("vccz", AMDGPU::SRC_VCCZ)
2014     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2015     .Case("execz", AMDGPU::SRC_EXECZ)
2016     .Case("src_execz", AMDGPU::SRC_EXECZ)
2017     .Case("scc", AMDGPU::SRC_SCC)
2018     .Case("src_scc", AMDGPU::SRC_SCC)
2019     .Case("tba", AMDGPU::TBA)
2020     .Case("tma", AMDGPU::TMA)
2021     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2022     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2023     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2024     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2025     .Case("vcc_lo", AMDGPU::VCC_LO)
2026     .Case("vcc_hi", AMDGPU::VCC_HI)
2027     .Case("exec_lo", AMDGPU::EXEC_LO)
2028     .Case("exec_hi", AMDGPU::EXEC_HI)
2029     .Case("tma_lo", AMDGPU::TMA_LO)
2030     .Case("tma_hi", AMDGPU::TMA_HI)
2031     .Case("tba_lo", AMDGPU::TBA_LO)
2032     .Case("tba_hi", AMDGPU::TBA_HI)
2033     .Case("pc", AMDGPU::PC_REG)
2034     .Case("null", AMDGPU::SGPR_NULL)
2035     .Default(AMDGPU::NoRegister);
2036 }
2037 
2038 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2039                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2040   auto R = parseRegister();
2041   if (!R) return true;
2042   assert(R->isReg());
2043   RegNo = R->getReg();
2044   StartLoc = R->getStartLoc();
2045   EndLoc = R->getEndLoc();
2046   return false;
2047 }
2048 
2049 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2050                                     SMLoc &EndLoc) {
2051   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2052 }
2053 
2054 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2055                                                        SMLoc &StartLoc,
2056                                                        SMLoc &EndLoc) {
2057   bool Result =
2058       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2059   bool PendingErrors = getParser().hasPendingError();
2060   getParser().clearPendingErrors();
2061   if (PendingErrors)
2062     return MatchOperand_ParseFail;
2063   if (Result)
2064     return MatchOperand_NoMatch;
2065   return MatchOperand_Success;
2066 }
2067 
2068 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2069                                             RegisterKind RegKind, unsigned Reg1) {
2070   switch (RegKind) {
2071   case IS_SPECIAL:
2072     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2073       Reg = AMDGPU::EXEC;
2074       RegWidth = 2;
2075       return true;
2076     }
2077     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2078       Reg = AMDGPU::FLAT_SCR;
2079       RegWidth = 2;
2080       return true;
2081     }
2082     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2083       Reg = AMDGPU::XNACK_MASK;
2084       RegWidth = 2;
2085       return true;
2086     }
2087     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2088       Reg = AMDGPU::VCC;
2089       RegWidth = 2;
2090       return true;
2091     }
2092     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2093       Reg = AMDGPU::TBA;
2094       RegWidth = 2;
2095       return true;
2096     }
2097     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2098       Reg = AMDGPU::TMA;
2099       RegWidth = 2;
2100       return true;
2101     }
2102     return false;
2103   case IS_VGPR:
2104   case IS_SGPR:
2105   case IS_AGPR:
2106   case IS_TTMP:
2107     if (Reg1 != Reg + RegWidth) {
2108       return false;
2109     }
2110     RegWidth++;
2111     return true;
2112   default:
2113     llvm_unreachable("unexpected register kind");
2114   }
2115 }
2116 
2117 struct RegInfo {
2118   StringLiteral Name;
2119   RegisterKind Kind;
2120 };
2121 
2122 static constexpr RegInfo RegularRegisters[] = {
2123   {{"v"},    IS_VGPR},
2124   {{"s"},    IS_SGPR},
2125   {{"ttmp"}, IS_TTMP},
2126   {{"acc"},  IS_AGPR},
2127   {{"a"},    IS_AGPR},
2128 };
2129 
2130 static bool isRegularReg(RegisterKind Kind) {
2131   return Kind == IS_VGPR ||
2132          Kind == IS_SGPR ||
2133          Kind == IS_TTMP ||
2134          Kind == IS_AGPR;
2135 }
2136 
2137 static const RegInfo* getRegularRegInfo(StringRef Str) {
2138   for (const RegInfo &Reg : RegularRegisters)
2139     if (Str.startswith(Reg.Name))
2140       return &Reg;
2141   return nullptr;
2142 }
2143 
2144 static bool getRegNum(StringRef Str, unsigned& Num) {
2145   return !Str.getAsInteger(10, Num);
2146 }
2147 
2148 bool
2149 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2150                             const AsmToken &NextToken) const {
2151 
2152   // A list of consecutive registers: [s0,s1,s2,s3]
2153   if (Token.is(AsmToken::LBrac))
2154     return true;
2155 
2156   if (!Token.is(AsmToken::Identifier))
2157     return false;
2158 
2159   // A single register like s0 or a range of registers like s[0:1]
2160 
2161   StringRef Str = Token.getString();
2162   const RegInfo *Reg = getRegularRegInfo(Str);
2163   if (Reg) {
2164     StringRef RegName = Reg->Name;
2165     StringRef RegSuffix = Str.substr(RegName.size());
2166     if (!RegSuffix.empty()) {
2167       unsigned Num;
2168       // A single register with an index: rXX
2169       if (getRegNum(RegSuffix, Num))
2170         return true;
2171     } else {
2172       // A range of registers: r[XX:YY].
2173       if (NextToken.is(AsmToken::LBrac))
2174         return true;
2175     }
2176   }
2177 
2178   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2179 }
2180 
2181 bool
2182 AMDGPUAsmParser::isRegister()
2183 {
2184   return isRegister(getToken(), peekToken());
2185 }
2186 
2187 unsigned
2188 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2189                                unsigned RegNum,
2190                                unsigned RegWidth) {
2191 
2192   assert(isRegularReg(RegKind));
2193 
2194   unsigned AlignSize = 1;
2195   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2196     // SGPR and TTMP registers must be aligned.
2197     // Max required alignment is 4 dwords.
2198     AlignSize = std::min(RegWidth, 4u);
2199   }
2200 
2201   if (RegNum % AlignSize != 0)
2202     return AMDGPU::NoRegister;
2203 
2204   unsigned RegIdx = RegNum / AlignSize;
2205   int RCID = getRegClass(RegKind, RegWidth);
2206   if (RCID == -1)
2207     return AMDGPU::NoRegister;
2208 
2209   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2210   const MCRegisterClass RC = TRI->getRegClass(RCID);
2211   if (RegIdx >= RC.getNumRegs())
2212     return AMDGPU::NoRegister;
2213 
2214   return RC.getRegister(RegIdx);
2215 }
2216 
2217 bool
2218 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2219   int64_t RegLo, RegHi;
2220   if (!trySkipToken(AsmToken::LBrac))
2221     return false;
2222 
2223   if (!parseExpr(RegLo))
2224     return false;
2225 
2226   if (trySkipToken(AsmToken::Colon)) {
2227     if (!parseExpr(RegHi))
2228       return false;
2229   } else {
2230     RegHi = RegLo;
2231   }
2232 
2233   if (!trySkipToken(AsmToken::RBrac))
2234     return false;
2235 
2236   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2237     return false;
2238 
2239   Num = static_cast<unsigned>(RegLo);
2240   Width = (RegHi - RegLo) + 1;
2241   return true;
2242 }
2243 
2244 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2245                                           unsigned &RegNum, unsigned &RegWidth,
2246                                           SmallVectorImpl<AsmToken> &Tokens) {
2247   assert(isToken(AsmToken::Identifier));
2248   unsigned Reg = getSpecialRegForName(getTokenStr());
2249   if (Reg) {
2250     RegNum = 0;
2251     RegWidth = 1;
2252     RegKind = IS_SPECIAL;
2253     Tokens.push_back(getToken());
2254     lex(); // skip register name
2255   }
2256   return Reg;
2257 }
2258 
2259 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2260                                           unsigned &RegNum, unsigned &RegWidth,
2261                                           SmallVectorImpl<AsmToken> &Tokens) {
2262   assert(isToken(AsmToken::Identifier));
2263   StringRef RegName = getTokenStr();
2264 
2265   const RegInfo *RI = getRegularRegInfo(RegName);
2266   if (!RI)
2267     return AMDGPU::NoRegister;
2268   Tokens.push_back(getToken());
2269   lex(); // skip register name
2270 
2271   RegKind = RI->Kind;
2272   StringRef RegSuffix = RegName.substr(RI->Name.size());
2273   if (!RegSuffix.empty()) {
2274     // Single 32-bit register: vXX.
2275     if (!getRegNum(RegSuffix, RegNum))
2276       return AMDGPU::NoRegister;
2277     RegWidth = 1;
2278   } else {
2279     // Range of registers: v[XX:YY]. ":YY" is optional.
2280     if (!ParseRegRange(RegNum, RegWidth))
2281       return AMDGPU::NoRegister;
2282   }
2283 
2284   return getRegularReg(RegKind, RegNum, RegWidth);
2285 }
2286 
2287 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2288                                        unsigned &RegWidth,
2289                                        SmallVectorImpl<AsmToken> &Tokens) {
2290   unsigned Reg = AMDGPU::NoRegister;
2291 
2292   if (!trySkipToken(AsmToken::LBrac))
2293     return AMDGPU::NoRegister;
2294 
2295   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2296 
2297   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2298     return AMDGPU::NoRegister;
2299   if (RegWidth != 1)
2300     return AMDGPU::NoRegister;
2301 
2302   for (; trySkipToken(AsmToken::Comma); ) {
2303     RegisterKind NextRegKind;
2304     unsigned NextReg, NextRegNum, NextRegWidth;
2305 
2306     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2307                              Tokens))
2308       return AMDGPU::NoRegister;
2309     if (NextRegWidth != 1)
2310       return AMDGPU::NoRegister;
2311     if (NextRegKind != RegKind)
2312       return AMDGPU::NoRegister;
2313     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2314       return AMDGPU::NoRegister;
2315   }
2316 
2317   if (!trySkipToken(AsmToken::RBrac))
2318     return AMDGPU::NoRegister;
2319 
2320   if (isRegularReg(RegKind))
2321     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2322 
2323   return Reg;
2324 }
2325 
2326 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2327                                           unsigned &RegNum, unsigned &RegWidth,
2328                                           SmallVectorImpl<AsmToken> &Tokens) {
2329   Reg = AMDGPU::NoRegister;
2330 
2331   if (isToken(AsmToken::Identifier)) {
2332     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2333     if (Reg == AMDGPU::NoRegister)
2334       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2335   } else {
2336     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2337   }
2338 
2339   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2340   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2341 }
2342 
2343 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2344                                           unsigned &RegNum, unsigned &RegWidth,
2345                                           bool RestoreOnFailure) {
2346   Reg = AMDGPU::NoRegister;
2347 
2348   SmallVector<AsmToken, 1> Tokens;
2349   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2350     if (RestoreOnFailure) {
2351       while (!Tokens.empty()) {
2352         getLexer().UnLex(Tokens.pop_back_val());
2353       }
2354     }
2355     return true;
2356   }
2357   return false;
2358 }
2359 
2360 Optional<StringRef>
2361 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2362   switch (RegKind) {
2363   case IS_VGPR:
2364     return StringRef(".amdgcn.next_free_vgpr");
2365   case IS_SGPR:
2366     return StringRef(".amdgcn.next_free_sgpr");
2367   default:
2368     return None;
2369   }
2370 }
2371 
2372 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2373   auto SymbolName = getGprCountSymbolName(RegKind);
2374   assert(SymbolName && "initializing invalid register kind");
2375   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2376   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2377 }
2378 
2379 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2380                                             unsigned DwordRegIndex,
2381                                             unsigned RegWidth) {
2382   // Symbols are only defined for GCN targets
2383   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2384     return true;
2385 
2386   auto SymbolName = getGprCountSymbolName(RegKind);
2387   if (!SymbolName)
2388     return true;
2389   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2390 
2391   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2392   int64_t OldCount;
2393 
2394   if (!Sym->isVariable())
2395     return !Error(getParser().getTok().getLoc(),
2396                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2397   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2398     return !Error(
2399         getParser().getTok().getLoc(),
2400         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2401 
2402   if (OldCount <= NewMax)
2403     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2404 
2405   return true;
2406 }
2407 
2408 std::unique_ptr<AMDGPUOperand>
2409 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2410   const auto &Tok = Parser.getTok();
2411   SMLoc StartLoc = Tok.getLoc();
2412   SMLoc EndLoc = Tok.getEndLoc();
2413   RegisterKind RegKind;
2414   unsigned Reg, RegNum, RegWidth;
2415 
2416   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2417     //FIXME: improve error messages (bug 41303).
2418     Error(StartLoc, "not a valid operand.");
2419     return nullptr;
2420   }
2421   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2422     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2423       return nullptr;
2424   } else
2425     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2426   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2427 }
2428 
2429 OperandMatchResultTy
2430 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2431   // TODO: add syntactic sugar for 1/(2*PI)
2432 
2433   assert(!isRegister());
2434   assert(!isModifier());
2435 
2436   const auto& Tok = getToken();
2437   const auto& NextTok = peekToken();
2438   bool IsReal = Tok.is(AsmToken::Real);
2439   SMLoc S = getLoc();
2440   bool Negate = false;
2441 
2442   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2443     lex();
2444     IsReal = true;
2445     Negate = true;
2446   }
2447 
2448   if (IsReal) {
2449     // Floating-point expressions are not supported.
2450     // Can only allow floating-point literals with an
2451     // optional sign.
2452 
2453     StringRef Num = getTokenStr();
2454     lex();
2455 
2456     APFloat RealVal(APFloat::IEEEdouble());
2457     auto roundMode = APFloat::rmNearestTiesToEven;
2458     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2459       return MatchOperand_ParseFail;
2460     }
2461     if (Negate)
2462       RealVal.changeSign();
2463 
2464     Operands.push_back(
2465       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2466                                AMDGPUOperand::ImmTyNone, true));
2467 
2468     return MatchOperand_Success;
2469 
2470   } else {
2471     int64_t IntVal;
2472     const MCExpr *Expr;
2473     SMLoc S = getLoc();
2474 
2475     if (HasSP3AbsModifier) {
2476       // This is a workaround for handling expressions
2477       // as arguments of SP3 'abs' modifier, for example:
2478       //     |1.0|
2479       //     |-1|
2480       //     |1+x|
2481       // This syntax is not compatible with syntax of standard
2482       // MC expressions (due to the trailing '|').
2483       SMLoc EndLoc;
2484       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2485         return MatchOperand_ParseFail;
2486     } else {
2487       if (Parser.parseExpression(Expr))
2488         return MatchOperand_ParseFail;
2489     }
2490 
2491     if (Expr->evaluateAsAbsolute(IntVal)) {
2492       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2493     } else {
2494       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2495     }
2496 
2497     return MatchOperand_Success;
2498   }
2499 
2500   return MatchOperand_NoMatch;
2501 }
2502 
2503 OperandMatchResultTy
2504 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2505   if (!isRegister())
2506     return MatchOperand_NoMatch;
2507 
2508   if (auto R = parseRegister()) {
2509     assert(R->isReg());
2510     Operands.push_back(std::move(R));
2511     return MatchOperand_Success;
2512   }
2513   return MatchOperand_ParseFail;
2514 }
2515 
2516 OperandMatchResultTy
2517 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2518   auto res = parseReg(Operands);
2519   if (res != MatchOperand_NoMatch) {
2520     return res;
2521   } else if (isModifier()) {
2522     return MatchOperand_NoMatch;
2523   } else {
2524     return parseImm(Operands, HasSP3AbsMod);
2525   }
2526 }
2527 
2528 bool
2529 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2530   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2531     const auto &str = Token.getString();
2532     return str == "abs" || str == "neg" || str == "sext";
2533   }
2534   return false;
2535 }
2536 
2537 bool
2538 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2539   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2540 }
2541 
2542 bool
2543 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2544   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2545 }
2546 
2547 bool
2548 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2549   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2550 }
2551 
2552 // Check if this is an operand modifier or an opcode modifier
2553 // which may look like an expression but it is not. We should
2554 // avoid parsing these modifiers as expressions. Currently
2555 // recognized sequences are:
2556 //   |...|
2557 //   abs(...)
2558 //   neg(...)
2559 //   sext(...)
2560 //   -reg
2561 //   -|...|
2562 //   -abs(...)
2563 //   name:...
2564 // Note that simple opcode modifiers like 'gds' may be parsed as
2565 // expressions; this is a special case. See getExpressionAsToken.
2566 //
2567 bool
2568 AMDGPUAsmParser::isModifier() {
2569 
2570   AsmToken Tok = getToken();
2571   AsmToken NextToken[2];
2572   peekTokens(NextToken);
2573 
2574   return isOperandModifier(Tok, NextToken[0]) ||
2575          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2576          isOpcodeModifierWithVal(Tok, NextToken[0]);
2577 }
2578 
2579 // Check if the current token is an SP3 'neg' modifier.
2580 // Currently this modifier is allowed in the following context:
2581 //
2582 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2583 // 2. Before an 'abs' modifier: -abs(...)
2584 // 3. Before an SP3 'abs' modifier: -|...|
2585 //
2586 // In all other cases "-" is handled as a part
2587 // of an expression that follows the sign.
2588 //
2589 // Note: When "-" is followed by an integer literal,
2590 // this is interpreted as integer negation rather
2591 // than a floating-point NEG modifier applied to N.
2592 // Beside being contr-intuitive, such use of floating-point
2593 // NEG modifier would have resulted in different meaning
2594 // of integer literals used with VOP1/2/C and VOP3,
2595 // for example:
2596 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2597 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2598 // Negative fp literals with preceding "-" are
2599 // handled likewise for unifomtity
2600 //
2601 bool
2602 AMDGPUAsmParser::parseSP3NegModifier() {
2603 
2604   AsmToken NextToken[2];
2605   peekTokens(NextToken);
2606 
2607   if (isToken(AsmToken::Minus) &&
2608       (isRegister(NextToken[0], NextToken[1]) ||
2609        NextToken[0].is(AsmToken::Pipe) ||
2610        isId(NextToken[0], "abs"))) {
2611     lex();
2612     return true;
2613   }
2614 
2615   return false;
2616 }
2617 
2618 OperandMatchResultTy
2619 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2620                                               bool AllowImm) {
2621   bool Neg, SP3Neg;
2622   bool Abs, SP3Abs;
2623   SMLoc Loc;
2624 
2625   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2626   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2627     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2628     return MatchOperand_ParseFail;
2629   }
2630 
2631   SP3Neg = parseSP3NegModifier();
2632 
2633   Loc = getLoc();
2634   Neg = trySkipId("neg");
2635   if (Neg && SP3Neg) {
2636     Error(Loc, "expected register or immediate");
2637     return MatchOperand_ParseFail;
2638   }
2639   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2640     return MatchOperand_ParseFail;
2641 
2642   Abs = trySkipId("abs");
2643   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2644     return MatchOperand_ParseFail;
2645 
2646   Loc = getLoc();
2647   SP3Abs = trySkipToken(AsmToken::Pipe);
2648   if (Abs && SP3Abs) {
2649     Error(Loc, "expected register or immediate");
2650     return MatchOperand_ParseFail;
2651   }
2652 
2653   OperandMatchResultTy Res;
2654   if (AllowImm) {
2655     Res = parseRegOrImm(Operands, SP3Abs);
2656   } else {
2657     Res = parseReg(Operands);
2658   }
2659   if (Res != MatchOperand_Success) {
2660     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2661   }
2662 
2663   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2664     return MatchOperand_ParseFail;
2665   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2666     return MatchOperand_ParseFail;
2667   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2668     return MatchOperand_ParseFail;
2669 
2670   AMDGPUOperand::Modifiers Mods;
2671   Mods.Abs = Abs || SP3Abs;
2672   Mods.Neg = Neg || SP3Neg;
2673 
2674   if (Mods.hasFPModifiers()) {
2675     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2676     if (Op.isExpr()) {
2677       Error(Op.getStartLoc(), "expected an absolute expression");
2678       return MatchOperand_ParseFail;
2679     }
2680     Op.setModifiers(Mods);
2681   }
2682   return MatchOperand_Success;
2683 }
2684 
2685 OperandMatchResultTy
2686 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2687                                                bool AllowImm) {
2688   bool Sext = trySkipId("sext");
2689   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2690     return MatchOperand_ParseFail;
2691 
2692   OperandMatchResultTy Res;
2693   if (AllowImm) {
2694     Res = parseRegOrImm(Operands);
2695   } else {
2696     Res = parseReg(Operands);
2697   }
2698   if (Res != MatchOperand_Success) {
2699     return Sext? MatchOperand_ParseFail : Res;
2700   }
2701 
2702   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2703     return MatchOperand_ParseFail;
2704 
2705   AMDGPUOperand::Modifiers Mods;
2706   Mods.Sext = Sext;
2707 
2708   if (Mods.hasIntModifiers()) {
2709     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2710     if (Op.isExpr()) {
2711       Error(Op.getStartLoc(), "expected an absolute expression");
2712       return MatchOperand_ParseFail;
2713     }
2714     Op.setModifiers(Mods);
2715   }
2716 
2717   return MatchOperand_Success;
2718 }
2719 
2720 OperandMatchResultTy
2721 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2722   return parseRegOrImmWithFPInputMods(Operands, false);
2723 }
2724 
2725 OperandMatchResultTy
2726 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2727   return parseRegOrImmWithIntInputMods(Operands, false);
2728 }
2729 
2730 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2731   auto Loc = getLoc();
2732   if (trySkipId("off")) {
2733     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2734                                                 AMDGPUOperand::ImmTyOff, false));
2735     return MatchOperand_Success;
2736   }
2737 
2738   if (!isRegister())
2739     return MatchOperand_NoMatch;
2740 
2741   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2742   if (Reg) {
2743     Operands.push_back(std::move(Reg));
2744     return MatchOperand_Success;
2745   }
2746 
2747   return MatchOperand_ParseFail;
2748 
2749 }
2750 
2751 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2752   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2753 
2754   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2755       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2756       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2757       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2758     return Match_InvalidOperand;
2759 
2760   if ((TSFlags & SIInstrFlags::VOP3) &&
2761       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2762       getForcedEncodingSize() != 64)
2763     return Match_PreferE32;
2764 
2765   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2766       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2767     // v_mac_f32/16 allow only dst_sel == DWORD;
2768     auto OpNum =
2769         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2770     const auto &Op = Inst.getOperand(OpNum);
2771     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2772       return Match_InvalidOperand;
2773     }
2774   }
2775 
2776   return Match_Success;
2777 }
2778 
2779 // What asm variants we should check
2780 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2781   if (getForcedEncodingSize() == 32) {
2782     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2783     return makeArrayRef(Variants);
2784   }
2785 
2786   if (isForcedVOP3()) {
2787     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2788     return makeArrayRef(Variants);
2789   }
2790 
2791   if (isForcedSDWA()) {
2792     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2793                                         AMDGPUAsmVariants::SDWA9};
2794     return makeArrayRef(Variants);
2795   }
2796 
2797   if (isForcedDPP()) {
2798     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2799     return makeArrayRef(Variants);
2800   }
2801 
2802   static const unsigned Variants[] = {
2803     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2804     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2805   };
2806 
2807   return makeArrayRef(Variants);
2808 }
2809 
2810 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2811   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2812   const unsigned Num = Desc.getNumImplicitUses();
2813   for (unsigned i = 0; i < Num; ++i) {
2814     unsigned Reg = Desc.ImplicitUses[i];
2815     switch (Reg) {
2816     case AMDGPU::FLAT_SCR:
2817     case AMDGPU::VCC:
2818     case AMDGPU::VCC_LO:
2819     case AMDGPU::VCC_HI:
2820     case AMDGPU::M0:
2821       return Reg;
2822     default:
2823       break;
2824     }
2825   }
2826   return AMDGPU::NoRegister;
2827 }
2828 
2829 // NB: This code is correct only when used to check constant
2830 // bus limitations because GFX7 support no f16 inline constants.
2831 // Note that there are no cases when a GFX7 opcode violates
2832 // constant bus limitations due to the use of an f16 constant.
2833 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2834                                        unsigned OpIdx) const {
2835   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2836 
2837   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2838     return false;
2839   }
2840 
2841   const MCOperand &MO = Inst.getOperand(OpIdx);
2842 
2843   int64_t Val = MO.getImm();
2844   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2845 
2846   switch (OpSize) { // expected operand size
2847   case 8:
2848     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2849   case 4:
2850     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2851   case 2: {
2852     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2853     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2854         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2855         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2856       return AMDGPU::isInlinableIntLiteral(Val);
2857 
2858     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2859         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2860         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2861       return AMDGPU::isInlinableIntLiteralV216(Val);
2862 
2863     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2864         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2865         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2866       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2867 
2868     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2869   }
2870   default:
2871     llvm_unreachable("invalid operand size");
2872   }
2873 }
2874 
2875 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2876   if (!isGFX10())
2877     return 1;
2878 
2879   switch (Opcode) {
2880   // 64-bit shift instructions can use only one scalar value input
2881   case AMDGPU::V_LSHLREV_B64:
2882   case AMDGPU::V_LSHLREV_B64_gfx10:
2883   case AMDGPU::V_LSHL_B64:
2884   case AMDGPU::V_LSHRREV_B64:
2885   case AMDGPU::V_LSHRREV_B64_gfx10:
2886   case AMDGPU::V_LSHR_B64:
2887   case AMDGPU::V_ASHRREV_I64:
2888   case AMDGPU::V_ASHRREV_I64_gfx10:
2889   case AMDGPU::V_ASHR_I64:
2890     return 1;
2891   default:
2892     return 2;
2893   }
2894 }
2895 
2896 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2897   const MCOperand &MO = Inst.getOperand(OpIdx);
2898   if (MO.isImm()) {
2899     return !isInlineConstant(Inst, OpIdx);
2900   } else if (MO.isReg()) {
2901     auto Reg = MO.getReg();
2902     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2903     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2904   } else {
2905     return true;
2906   }
2907 }
2908 
2909 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2910   const unsigned Opcode = Inst.getOpcode();
2911   const MCInstrDesc &Desc = MII.get(Opcode);
2912   unsigned ConstantBusUseCount = 0;
2913   unsigned NumLiterals = 0;
2914   unsigned LiteralSize;
2915 
2916   if (Desc.TSFlags &
2917       (SIInstrFlags::VOPC |
2918        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2919        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2920        SIInstrFlags::SDWA)) {
2921     // Check special imm operands (used by madmk, etc)
2922     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2923       ++ConstantBusUseCount;
2924     }
2925 
2926     SmallDenseSet<unsigned> SGPRsUsed;
2927     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2928     if (SGPRUsed != AMDGPU::NoRegister) {
2929       SGPRsUsed.insert(SGPRUsed);
2930       ++ConstantBusUseCount;
2931     }
2932 
2933     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2934     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2935     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2936 
2937     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2938 
2939     for (int OpIdx : OpIndices) {
2940       if (OpIdx == -1) break;
2941 
2942       const MCOperand &MO = Inst.getOperand(OpIdx);
2943       if (usesConstantBus(Inst, OpIdx)) {
2944         if (MO.isReg()) {
2945           const unsigned Reg = mc2PseudoReg(MO.getReg());
2946           // Pairs of registers with a partial intersections like these
2947           //   s0, s[0:1]
2948           //   flat_scratch_lo, flat_scratch
2949           //   flat_scratch_lo, flat_scratch_hi
2950           // are theoretically valid but they are disabled anyway.
2951           // Note that this code mimics SIInstrInfo::verifyInstruction
2952           if (!SGPRsUsed.count(Reg)) {
2953             SGPRsUsed.insert(Reg);
2954             ++ConstantBusUseCount;
2955           }
2956         } else { // Expression or a literal
2957 
2958           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2959             continue; // special operand like VINTERP attr_chan
2960 
2961           // An instruction may use only one literal.
2962           // This has been validated on the previous step.
2963           // See validateVOP3Literal.
2964           // This literal may be used as more than one operand.
2965           // If all these operands are of the same size,
2966           // this literal counts as one scalar value.
2967           // Otherwise it counts as 2 scalar values.
2968           // See "GFX10 Shader Programming", section 3.6.2.3.
2969 
2970           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2971           if (Size < 4) Size = 4;
2972 
2973           if (NumLiterals == 0) {
2974             NumLiterals = 1;
2975             LiteralSize = Size;
2976           } else if (LiteralSize != Size) {
2977             NumLiterals = 2;
2978           }
2979         }
2980       }
2981     }
2982   }
2983   ConstantBusUseCount += NumLiterals;
2984 
2985   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2986 }
2987 
2988 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2989   const unsigned Opcode = Inst.getOpcode();
2990   const MCInstrDesc &Desc = MII.get(Opcode);
2991 
2992   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2993   if (DstIdx == -1 ||
2994       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2995     return true;
2996   }
2997 
2998   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2999 
3000   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3001   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3002   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3003 
3004   assert(DstIdx != -1);
3005   const MCOperand &Dst = Inst.getOperand(DstIdx);
3006   assert(Dst.isReg());
3007   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3008 
3009   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3010 
3011   for (int SrcIdx : SrcIndices) {
3012     if (SrcIdx == -1) break;
3013     const MCOperand &Src = Inst.getOperand(SrcIdx);
3014     if (Src.isReg()) {
3015       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3016       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3017         return false;
3018       }
3019     }
3020   }
3021 
3022   return true;
3023 }
3024 
3025 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3026 
3027   const unsigned Opc = Inst.getOpcode();
3028   const MCInstrDesc &Desc = MII.get(Opc);
3029 
3030   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3031     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3032     assert(ClampIdx != -1);
3033     return Inst.getOperand(ClampIdx).getImm() == 0;
3034   }
3035 
3036   return true;
3037 }
3038 
3039 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3040 
3041   const unsigned Opc = Inst.getOpcode();
3042   const MCInstrDesc &Desc = MII.get(Opc);
3043 
3044   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3045     return true;
3046 
3047   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3048   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3049   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3050 
3051   assert(VDataIdx != -1);
3052   assert(DMaskIdx != -1);
3053   assert(TFEIdx != -1);
3054 
3055   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3056   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3057   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3058   if (DMask == 0)
3059     DMask = 1;
3060 
3061   unsigned DataSize =
3062     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3063   if (hasPackedD16()) {
3064     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3065     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3066       DataSize = (DataSize + 1) / 2;
3067   }
3068 
3069   return (VDataSize / 4) == DataSize + TFESize;
3070 }
3071 
3072 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3073   const unsigned Opc = Inst.getOpcode();
3074   const MCInstrDesc &Desc = MII.get(Opc);
3075 
3076   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3077     return true;
3078 
3079   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3080   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3081       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3082   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3083   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3084   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3085 
3086   assert(VAddr0Idx != -1);
3087   assert(SrsrcIdx != -1);
3088   assert(DimIdx != -1);
3089   assert(SrsrcIdx > VAddr0Idx);
3090 
3091   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3092   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3093   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3094   unsigned VAddrSize =
3095       IsNSA ? SrsrcIdx - VAddr0Idx
3096             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3097 
3098   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3099                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3100                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3101                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3102   if (!IsNSA) {
3103     if (AddrSize > 8)
3104       AddrSize = 16;
3105     else if (AddrSize > 4)
3106       AddrSize = 8;
3107   }
3108 
3109   return VAddrSize == AddrSize;
3110 }
3111 
3112 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3113 
3114   const unsigned Opc = Inst.getOpcode();
3115   const MCInstrDesc &Desc = MII.get(Opc);
3116 
3117   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3118     return true;
3119   if (!Desc.mayLoad() || !Desc.mayStore())
3120     return true; // Not atomic
3121 
3122   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3123   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3124 
3125   // This is an incomplete check because image_atomic_cmpswap
3126   // may only use 0x3 and 0xf while other atomic operations
3127   // may use 0x1 and 0x3. However these limitations are
3128   // verified when we check that dmask matches dst size.
3129   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3130 }
3131 
3132 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3133 
3134   const unsigned Opc = Inst.getOpcode();
3135   const MCInstrDesc &Desc = MII.get(Opc);
3136 
3137   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3138     return true;
3139 
3140   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3141   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3142 
3143   // GATHER4 instructions use dmask in a different fashion compared to
3144   // other MIMG instructions. The only useful DMASK values are
3145   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3146   // (red,red,red,red) etc.) The ISA document doesn't mention
3147   // this.
3148   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3149 }
3150 
3151 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3152 {
3153   switch (Opcode) {
3154   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3155   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3156   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3157     return true;
3158   default:
3159     return false;
3160   }
3161 }
3162 
3163 // movrels* opcodes should only allow VGPRS as src0.
3164 // This is specified in .td description for vop1/vop3,
3165 // but sdwa is handled differently. See isSDWAOperand.
3166 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3167 
3168   const unsigned Opc = Inst.getOpcode();
3169   const MCInstrDesc &Desc = MII.get(Opc);
3170 
3171   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3172     return true;
3173 
3174   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3175   assert(Src0Idx != -1);
3176 
3177   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3178   if (!Src0.isReg())
3179     return false;
3180 
3181   auto Reg = Src0.getReg();
3182   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3183   return !isSGPR(mc2PseudoReg(Reg), TRI);
3184 }
3185 
3186 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3187 
3188   const unsigned Opc = Inst.getOpcode();
3189 
3190   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3191     return true;
3192 
3193   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3194   assert(Src0Idx != -1);
3195 
3196   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3197   if (!Src0.isReg())
3198     return true;
3199 
3200   auto Reg = Src0.getReg();
3201   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3202   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3203     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3204     return false;
3205   }
3206 
3207   return true;
3208 }
3209 
3210 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3211 
3212   const unsigned Opc = Inst.getOpcode();
3213   const MCInstrDesc &Desc = MII.get(Opc);
3214 
3215   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3216     return true;
3217 
3218   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3219   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3220     if (isCI() || isSI())
3221       return false;
3222   }
3223 
3224   return true;
3225 }
3226 
3227 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3228   const unsigned Opc = Inst.getOpcode();
3229   const MCInstrDesc &Desc = MII.get(Opc);
3230 
3231   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3232     return true;
3233 
3234   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3235   if (DimIdx < 0)
3236     return true;
3237 
3238   long Imm = Inst.getOperand(DimIdx).getImm();
3239   if (Imm < 0 || Imm >= 8)
3240     return false;
3241 
3242   return true;
3243 }
3244 
3245 static bool IsRevOpcode(const unsigned Opcode)
3246 {
3247   switch (Opcode) {
3248   case AMDGPU::V_SUBREV_F32_e32:
3249   case AMDGPU::V_SUBREV_F32_e64:
3250   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3251   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3252   case AMDGPU::V_SUBREV_F32_e32_vi:
3253   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3254   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3255   case AMDGPU::V_SUBREV_F32_e64_vi:
3256 
3257   case AMDGPU::V_SUBREV_CO_U32_e32:
3258   case AMDGPU::V_SUBREV_CO_U32_e64:
3259   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3260   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3261 
3262   case AMDGPU::V_SUBBREV_U32_e32:
3263   case AMDGPU::V_SUBBREV_U32_e64:
3264   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3265   case AMDGPU::V_SUBBREV_U32_e32_vi:
3266   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3267   case AMDGPU::V_SUBBREV_U32_e64_vi:
3268 
3269   case AMDGPU::V_SUBREV_U32_e32:
3270   case AMDGPU::V_SUBREV_U32_e64:
3271   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3272   case AMDGPU::V_SUBREV_U32_e32_vi:
3273   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3274   case AMDGPU::V_SUBREV_U32_e64_vi:
3275 
3276   case AMDGPU::V_SUBREV_F16_e32:
3277   case AMDGPU::V_SUBREV_F16_e64:
3278   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3279   case AMDGPU::V_SUBREV_F16_e32_vi:
3280   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3281   case AMDGPU::V_SUBREV_F16_e64_vi:
3282 
3283   case AMDGPU::V_SUBREV_U16_e32:
3284   case AMDGPU::V_SUBREV_U16_e64:
3285   case AMDGPU::V_SUBREV_U16_e32_vi:
3286   case AMDGPU::V_SUBREV_U16_e64_vi:
3287 
3288   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3289   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3290   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3291 
3292   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3293   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3294 
3295   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3296   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3297 
3298   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3299   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3300 
3301   case AMDGPU::V_LSHRREV_B32_e32:
3302   case AMDGPU::V_LSHRREV_B32_e64:
3303   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3304   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3305   case AMDGPU::V_LSHRREV_B32_e32_vi:
3306   case AMDGPU::V_LSHRREV_B32_e64_vi:
3307   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3308   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3309 
3310   case AMDGPU::V_ASHRREV_I32_e32:
3311   case AMDGPU::V_ASHRREV_I32_e64:
3312   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3313   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3314   case AMDGPU::V_ASHRREV_I32_e32_vi:
3315   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3316   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3317   case AMDGPU::V_ASHRREV_I32_e64_vi:
3318 
3319   case AMDGPU::V_LSHLREV_B32_e32:
3320   case AMDGPU::V_LSHLREV_B32_e64:
3321   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3322   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3323   case AMDGPU::V_LSHLREV_B32_e32_vi:
3324   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3325   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3326   case AMDGPU::V_LSHLREV_B32_e64_vi:
3327 
3328   case AMDGPU::V_LSHLREV_B16_e32:
3329   case AMDGPU::V_LSHLREV_B16_e64:
3330   case AMDGPU::V_LSHLREV_B16_e32_vi:
3331   case AMDGPU::V_LSHLREV_B16_e64_vi:
3332   case AMDGPU::V_LSHLREV_B16_gfx10:
3333 
3334   case AMDGPU::V_LSHRREV_B16_e32:
3335   case AMDGPU::V_LSHRREV_B16_e64:
3336   case AMDGPU::V_LSHRREV_B16_e32_vi:
3337   case AMDGPU::V_LSHRREV_B16_e64_vi:
3338   case AMDGPU::V_LSHRREV_B16_gfx10:
3339 
3340   case AMDGPU::V_ASHRREV_I16_e32:
3341   case AMDGPU::V_ASHRREV_I16_e64:
3342   case AMDGPU::V_ASHRREV_I16_e32_vi:
3343   case AMDGPU::V_ASHRREV_I16_e64_vi:
3344   case AMDGPU::V_ASHRREV_I16_gfx10:
3345 
3346   case AMDGPU::V_LSHLREV_B64:
3347   case AMDGPU::V_LSHLREV_B64_gfx10:
3348   case AMDGPU::V_LSHLREV_B64_vi:
3349 
3350   case AMDGPU::V_LSHRREV_B64:
3351   case AMDGPU::V_LSHRREV_B64_gfx10:
3352   case AMDGPU::V_LSHRREV_B64_vi:
3353 
3354   case AMDGPU::V_ASHRREV_I64:
3355   case AMDGPU::V_ASHRREV_I64_gfx10:
3356   case AMDGPU::V_ASHRREV_I64_vi:
3357 
3358   case AMDGPU::V_PK_LSHLREV_B16:
3359   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3360   case AMDGPU::V_PK_LSHLREV_B16_vi:
3361 
3362   case AMDGPU::V_PK_LSHRREV_B16:
3363   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3364   case AMDGPU::V_PK_LSHRREV_B16_vi:
3365   case AMDGPU::V_PK_ASHRREV_I16:
3366   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3367   case AMDGPU::V_PK_ASHRREV_I16_vi:
3368     return true;
3369   default:
3370     return false;
3371   }
3372 }
3373 
3374 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3375 
3376   using namespace SIInstrFlags;
3377   const unsigned Opcode = Inst.getOpcode();
3378   const MCInstrDesc &Desc = MII.get(Opcode);
3379 
3380   // lds_direct register is defined so that it can be used
3381   // with 9-bit operands only. Ignore encodings which do not accept these.
3382   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3383     return true;
3384 
3385   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3386   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3387   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3388 
3389   const int SrcIndices[] = { Src1Idx, Src2Idx };
3390 
3391   // lds_direct cannot be specified as either src1 or src2.
3392   for (int SrcIdx : SrcIndices) {
3393     if (SrcIdx == -1) break;
3394     const MCOperand &Src = Inst.getOperand(SrcIdx);
3395     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3396       return false;
3397     }
3398   }
3399 
3400   if (Src0Idx == -1)
3401     return true;
3402 
3403   const MCOperand &Src = Inst.getOperand(Src0Idx);
3404   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3405     return true;
3406 
3407   // lds_direct is specified as src0. Check additional limitations.
3408   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3409 }
3410 
3411 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3412   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3413     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3414     if (Op.isFlatOffset())
3415       return Op.getStartLoc();
3416   }
3417   return getLoc();
3418 }
3419 
3420 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3421                                          const OperandVector &Operands) {
3422   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3423   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3424     return true;
3425 
3426   auto Opcode = Inst.getOpcode();
3427   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3428   assert(OpNum != -1);
3429 
3430   const auto &Op = Inst.getOperand(OpNum);
3431   if (!hasFlatOffsets() && Op.getImm() != 0) {
3432     Error(getFlatOffsetLoc(Operands),
3433           "flat offset modifier is not supported on this GPU");
3434     return false;
3435   }
3436 
3437   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3438   // For FLAT segment the offset must be positive;
3439   // MSB is ignored and forced to zero.
3440   unsigned OffsetSize = isGFX9() ? 13 : 12;
3441   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3442     if (!isIntN(OffsetSize, Op.getImm())) {
3443       Error(getFlatOffsetLoc(Operands),
3444             isGFX9() ? "expected a 13-bit signed offset" :
3445                        "expected a 12-bit signed offset");
3446       return false;
3447     }
3448   } else {
3449     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3450       Error(getFlatOffsetLoc(Operands),
3451             isGFX9() ? "expected a 12-bit unsigned offset" :
3452                        "expected an 11-bit unsigned offset");
3453       return false;
3454     }
3455   }
3456 
3457   return true;
3458 }
3459 
3460 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3461   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3462     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3463     if (Op.isSMEMOffset())
3464       return Op.getStartLoc();
3465   }
3466   return getLoc();
3467 }
3468 
3469 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3470                                          const OperandVector &Operands) {
3471   if (isCI() || isSI())
3472     return true;
3473 
3474   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3475   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3476     return true;
3477 
3478   auto Opcode = Inst.getOpcode();
3479   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3480   if (OpNum == -1)
3481     return true;
3482 
3483   const auto &Op = Inst.getOperand(OpNum);
3484   if (!Op.isImm())
3485     return true;
3486 
3487   uint64_t Offset = Op.getImm();
3488   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3489   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3490       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3491     return true;
3492 
3493   Error(getSMEMOffsetLoc(Operands),
3494         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3495                                "expected a 21-bit signed offset");
3496 
3497   return false;
3498 }
3499 
3500 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3501   unsigned Opcode = Inst.getOpcode();
3502   const MCInstrDesc &Desc = MII.get(Opcode);
3503   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3504     return true;
3505 
3506   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3507   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3508 
3509   const int OpIndices[] = { Src0Idx, Src1Idx };
3510 
3511   unsigned NumExprs = 0;
3512   unsigned NumLiterals = 0;
3513   uint32_t LiteralValue;
3514 
3515   for (int OpIdx : OpIndices) {
3516     if (OpIdx == -1) break;
3517 
3518     const MCOperand &MO = Inst.getOperand(OpIdx);
3519     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3520     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3521       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3522         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3523         if (NumLiterals == 0 || LiteralValue != Value) {
3524           LiteralValue = Value;
3525           ++NumLiterals;
3526         }
3527       } else if (MO.isExpr()) {
3528         ++NumExprs;
3529       }
3530     }
3531   }
3532 
3533   return NumLiterals + NumExprs <= 1;
3534 }
3535 
3536 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3537   const unsigned Opc = Inst.getOpcode();
3538   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3539       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3540     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3541     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3542 
3543     if (OpSel & ~3)
3544       return false;
3545   }
3546   return true;
3547 }
3548 
3549 // Check if VCC register matches wavefront size
3550 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3551   auto FB = getFeatureBits();
3552   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3553     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3554 }
3555 
3556 // VOP3 literal is only allowed in GFX10+ and only one can be used
3557 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3558   unsigned Opcode = Inst.getOpcode();
3559   const MCInstrDesc &Desc = MII.get(Opcode);
3560   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3561     return true;
3562 
3563   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3564   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3565   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3566 
3567   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3568 
3569   unsigned NumExprs = 0;
3570   unsigned NumLiterals = 0;
3571   uint32_t LiteralValue;
3572 
3573   for (int OpIdx : OpIndices) {
3574     if (OpIdx == -1) break;
3575 
3576     const MCOperand &MO = Inst.getOperand(OpIdx);
3577     if (!MO.isImm() && !MO.isExpr())
3578       continue;
3579     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3580       continue;
3581 
3582     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3583         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3584       return false;
3585 
3586     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3587       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3588       if (NumLiterals == 0 || LiteralValue != Value) {
3589         LiteralValue = Value;
3590         ++NumLiterals;
3591       }
3592     } else if (MO.isExpr()) {
3593       ++NumExprs;
3594     }
3595   }
3596   NumLiterals += NumExprs;
3597 
3598   return !NumLiterals ||
3599          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3600 }
3601 
3602 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3603                                           const SMLoc &IDLoc,
3604                                           const OperandVector &Operands) {
3605   if (!validateLdsDirect(Inst)) {
3606     Error(IDLoc,
3607       "invalid use of lds_direct");
3608     return false;
3609   }
3610   if (!validateSOPLiteral(Inst)) {
3611     Error(IDLoc,
3612       "only one literal operand is allowed");
3613     return false;
3614   }
3615   if (!validateVOP3Literal(Inst)) {
3616     Error(IDLoc,
3617       "invalid literal operand");
3618     return false;
3619   }
3620   if (!validateConstantBusLimitations(Inst)) {
3621     Error(IDLoc,
3622       "invalid operand (violates constant bus restrictions)");
3623     return false;
3624   }
3625   if (!validateEarlyClobberLimitations(Inst)) {
3626     Error(IDLoc,
3627       "destination must be different than all sources");
3628     return false;
3629   }
3630   if (!validateIntClampSupported(Inst)) {
3631     Error(IDLoc,
3632       "integer clamping is not supported on this GPU");
3633     return false;
3634   }
3635   if (!validateOpSel(Inst)) {
3636     Error(IDLoc,
3637       "invalid op_sel operand");
3638     return false;
3639   }
3640   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3641   if (!validateMIMGD16(Inst)) {
3642     Error(IDLoc,
3643       "d16 modifier is not supported on this GPU");
3644     return false;
3645   }
3646   if (!validateMIMGDim(Inst)) {
3647     Error(IDLoc, "dim modifier is required on this GPU");
3648     return false;
3649   }
3650   if (!validateMIMGDataSize(Inst)) {
3651     Error(IDLoc,
3652       "image data size does not match dmask and tfe");
3653     return false;
3654   }
3655   if (!validateMIMGAddrSize(Inst)) {
3656     Error(IDLoc,
3657       "image address size does not match dim and a16");
3658     return false;
3659   }
3660   if (!validateMIMGAtomicDMask(Inst)) {
3661     Error(IDLoc,
3662       "invalid atomic image dmask");
3663     return false;
3664   }
3665   if (!validateMIMGGatherDMask(Inst)) {
3666     Error(IDLoc,
3667       "invalid image_gather dmask: only one bit must be set");
3668     return false;
3669   }
3670   if (!validateMovrels(Inst)) {
3671     Error(IDLoc, "source operand must be a VGPR");
3672     return false;
3673   }
3674   if (!validateFlatOffset(Inst, Operands)) {
3675     return false;
3676   }
3677   if (!validateSMEMOffset(Inst, Operands)) {
3678     return false;
3679   }
3680   if (!validateMAIAccWrite(Inst)) {
3681     return false;
3682   }
3683 
3684   return true;
3685 }
3686 
3687 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3688                                             const FeatureBitset &FBS,
3689                                             unsigned VariantID = 0);
3690 
3691 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3692                                               OperandVector &Operands,
3693                                               MCStreamer &Out,
3694                                               uint64_t &ErrorInfo,
3695                                               bool MatchingInlineAsm) {
3696   MCInst Inst;
3697   unsigned Result = Match_Success;
3698   for (auto Variant : getMatchedVariants()) {
3699     uint64_t EI;
3700     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3701                                   Variant);
3702     // We order match statuses from least to most specific. We use most specific
3703     // status as resulting
3704     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3705     if ((R == Match_Success) ||
3706         (R == Match_PreferE32) ||
3707         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3708         (R == Match_InvalidOperand && Result != Match_MissingFeature
3709                                    && Result != Match_PreferE32) ||
3710         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3711                                    && Result != Match_MissingFeature
3712                                    && Result != Match_PreferE32)) {
3713       Result = R;
3714       ErrorInfo = EI;
3715     }
3716     if (R == Match_Success)
3717       break;
3718   }
3719 
3720   switch (Result) {
3721   default: break;
3722   case Match_Success:
3723     if (!validateInstruction(Inst, IDLoc, Operands)) {
3724       return true;
3725     }
3726     Inst.setLoc(IDLoc);
3727     Out.emitInstruction(Inst, getSTI());
3728     return false;
3729 
3730   case Match_MissingFeature:
3731     return Error(IDLoc, "instruction not supported on this GPU");
3732 
3733   case Match_MnemonicFail: {
3734     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3735     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3736         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3737     return Error(IDLoc, "invalid instruction" + Suggestion,
3738                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3739   }
3740 
3741   case Match_InvalidOperand: {
3742     SMLoc ErrorLoc = IDLoc;
3743     if (ErrorInfo != ~0ULL) {
3744       if (ErrorInfo >= Operands.size()) {
3745         return Error(IDLoc, "too few operands for instruction");
3746       }
3747       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3748       if (ErrorLoc == SMLoc())
3749         ErrorLoc = IDLoc;
3750     }
3751     return Error(ErrorLoc, "invalid operand for instruction");
3752   }
3753 
3754   case Match_PreferE32:
3755     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3756                         "should be encoded as e32");
3757   }
3758   llvm_unreachable("Implement any new match types added!");
3759 }
3760 
3761 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3762   int64_t Tmp = -1;
3763   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3764     return true;
3765   }
3766   if (getParser().parseAbsoluteExpression(Tmp)) {
3767     return true;
3768   }
3769   Ret = static_cast<uint32_t>(Tmp);
3770   return false;
3771 }
3772 
3773 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3774                                                uint32_t &Minor) {
3775   if (ParseAsAbsoluteExpression(Major))
3776     return TokError("invalid major version");
3777 
3778   if (getLexer().isNot(AsmToken::Comma))
3779     return TokError("minor version number required, comma expected");
3780   Lex();
3781 
3782   if (ParseAsAbsoluteExpression(Minor))
3783     return TokError("invalid minor version");
3784 
3785   return false;
3786 }
3787 
3788 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3789   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3790     return TokError("directive only supported for amdgcn architecture");
3791 
3792   std::string Target;
3793 
3794   SMLoc TargetStart = getTok().getLoc();
3795   if (getParser().parseEscapedString(Target))
3796     return true;
3797   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3798 
3799   std::string ExpectedTarget;
3800   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3801   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3802 
3803   if (Target != ExpectedTargetOS.str())
3804     return getParser().Error(TargetRange.Start, "target must match options",
3805                              TargetRange);
3806 
3807   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3808   return false;
3809 }
3810 
3811 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3812   return getParser().Error(Range.Start, "value out of range", Range);
3813 }
3814 
3815 bool AMDGPUAsmParser::calculateGPRBlocks(
3816     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3817     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3818     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3819     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3820   // TODO(scott.linder): These calculations are duplicated from
3821   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3822   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3823 
3824   unsigned NumVGPRs = NextFreeVGPR;
3825   unsigned NumSGPRs = NextFreeSGPR;
3826 
3827   if (Version.Major >= 10)
3828     NumSGPRs = 0;
3829   else {
3830     unsigned MaxAddressableNumSGPRs =
3831         IsaInfo::getAddressableNumSGPRs(&getSTI());
3832 
3833     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3834         NumSGPRs > MaxAddressableNumSGPRs)
3835       return OutOfRangeError(SGPRRange);
3836 
3837     NumSGPRs +=
3838         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3839 
3840     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3841         NumSGPRs > MaxAddressableNumSGPRs)
3842       return OutOfRangeError(SGPRRange);
3843 
3844     if (Features.test(FeatureSGPRInitBug))
3845       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3846   }
3847 
3848   VGPRBlocks =
3849       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3850   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3851 
3852   return false;
3853 }
3854 
3855 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3856   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3857     return TokError("directive only supported for amdgcn architecture");
3858 
3859   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3860     return TokError("directive only supported for amdhsa OS");
3861 
3862   StringRef KernelName;
3863   if (getParser().parseIdentifier(KernelName))
3864     return true;
3865 
3866   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3867 
3868   StringSet<> Seen;
3869 
3870   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3871 
3872   SMRange VGPRRange;
3873   uint64_t NextFreeVGPR = 0;
3874   SMRange SGPRRange;
3875   uint64_t NextFreeSGPR = 0;
3876   unsigned UserSGPRCount = 0;
3877   bool ReserveVCC = true;
3878   bool ReserveFlatScr = true;
3879   bool ReserveXNACK = hasXNACK();
3880   Optional<bool> EnableWavefrontSize32;
3881 
3882   while (true) {
3883     while (getLexer().is(AsmToken::EndOfStatement))
3884       Lex();
3885 
3886     if (getLexer().isNot(AsmToken::Identifier))
3887       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3888 
3889     StringRef ID = getTok().getIdentifier();
3890     SMRange IDRange = getTok().getLocRange();
3891     Lex();
3892 
3893     if (ID == ".end_amdhsa_kernel")
3894       break;
3895 
3896     if (Seen.find(ID) != Seen.end())
3897       return TokError(".amdhsa_ directives cannot be repeated");
3898     Seen.insert(ID);
3899 
3900     SMLoc ValStart = getTok().getLoc();
3901     int64_t IVal;
3902     if (getParser().parseAbsoluteExpression(IVal))
3903       return true;
3904     SMLoc ValEnd = getTok().getLoc();
3905     SMRange ValRange = SMRange(ValStart, ValEnd);
3906 
3907     if (IVal < 0)
3908       return OutOfRangeError(ValRange);
3909 
3910     uint64_t Val = IVal;
3911 
3912 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3913   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3914     return OutOfRangeError(RANGE);                                             \
3915   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3916 
3917     if (ID == ".amdhsa_group_segment_fixed_size") {
3918       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3919         return OutOfRangeError(ValRange);
3920       KD.group_segment_fixed_size = Val;
3921     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3922       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3923         return OutOfRangeError(ValRange);
3924       KD.private_segment_fixed_size = Val;
3925     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3926       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3927                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3928                        Val, ValRange);
3929       if (Val)
3930         UserSGPRCount += 4;
3931     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3932       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3933                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3934                        ValRange);
3935       if (Val)
3936         UserSGPRCount += 2;
3937     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3938       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3939                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3940                        ValRange);
3941       if (Val)
3942         UserSGPRCount += 2;
3943     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3944       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3945                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3946                        Val, ValRange);
3947       if (Val)
3948         UserSGPRCount += 2;
3949     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3950       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3951                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3952                        ValRange);
3953       if (Val)
3954         UserSGPRCount += 2;
3955     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3956       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3957                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3958                        ValRange);
3959       if (Val)
3960         UserSGPRCount += 2;
3961     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3962       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3963                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3964                        Val, ValRange);
3965       if (Val)
3966         UserSGPRCount += 1;
3967     } else if (ID == ".amdhsa_wavefront_size32") {
3968       if (IVersion.Major < 10)
3969         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3970                                  IDRange);
3971       EnableWavefrontSize32 = Val;
3972       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3973                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3974                        Val, ValRange);
3975     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3976       PARSE_BITS_ENTRY(
3977           KD.compute_pgm_rsrc2,
3978           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3979           ValRange);
3980     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3981       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3982                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3983                        ValRange);
3984     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3985       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3986                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3987                        ValRange);
3988     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3989       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3990                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3991                        ValRange);
3992     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3993       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3994                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3995                        ValRange);
3996     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3997       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3998                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3999                        ValRange);
4000     } else if (ID == ".amdhsa_next_free_vgpr") {
4001       VGPRRange = ValRange;
4002       NextFreeVGPR = Val;
4003     } else if (ID == ".amdhsa_next_free_sgpr") {
4004       SGPRRange = ValRange;
4005       NextFreeSGPR = Val;
4006     } else if (ID == ".amdhsa_reserve_vcc") {
4007       if (!isUInt<1>(Val))
4008         return OutOfRangeError(ValRange);
4009       ReserveVCC = Val;
4010     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4011       if (IVersion.Major < 7)
4012         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4013                                  IDRange);
4014       if (!isUInt<1>(Val))
4015         return OutOfRangeError(ValRange);
4016       ReserveFlatScr = Val;
4017     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4018       if (IVersion.Major < 8)
4019         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4020                                  IDRange);
4021       if (!isUInt<1>(Val))
4022         return OutOfRangeError(ValRange);
4023       ReserveXNACK = Val;
4024     } else if (ID == ".amdhsa_float_round_mode_32") {
4025       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4026                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4027     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4028       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4029                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4030     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4031       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4032                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4033     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4035                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4036                        ValRange);
4037     } else if (ID == ".amdhsa_dx10_clamp") {
4038       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4039                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4040     } else if (ID == ".amdhsa_ieee_mode") {
4041       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4042                        Val, ValRange);
4043     } else if (ID == ".amdhsa_fp16_overflow") {
4044       if (IVersion.Major < 9)
4045         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4046                                  IDRange);
4047       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4048                        ValRange);
4049     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4050       if (IVersion.Major < 10)
4051         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4052                                  IDRange);
4053       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4054                        ValRange);
4055     } else if (ID == ".amdhsa_memory_ordered") {
4056       if (IVersion.Major < 10)
4057         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4058                                  IDRange);
4059       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4060                        ValRange);
4061     } else if (ID == ".amdhsa_forward_progress") {
4062       if (IVersion.Major < 10)
4063         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4064                                  IDRange);
4065       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4066                        ValRange);
4067     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4068       PARSE_BITS_ENTRY(
4069           KD.compute_pgm_rsrc2,
4070           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4071           ValRange);
4072     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4073       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4074                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4075                        Val, ValRange);
4076     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4077       PARSE_BITS_ENTRY(
4078           KD.compute_pgm_rsrc2,
4079           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4080           ValRange);
4081     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4082       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4083                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4084                        Val, ValRange);
4085     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4086       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4087                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4088                        Val, ValRange);
4089     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4090       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4091                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4092                        Val, ValRange);
4093     } else if (ID == ".amdhsa_exception_int_div_zero") {
4094       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4095                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4096                        Val, ValRange);
4097     } else {
4098       return getParser().Error(IDRange.Start,
4099                                "unknown .amdhsa_kernel directive", IDRange);
4100     }
4101 
4102 #undef PARSE_BITS_ENTRY
4103   }
4104 
4105   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4106     return TokError(".amdhsa_next_free_vgpr directive is required");
4107 
4108   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4109     return TokError(".amdhsa_next_free_sgpr directive is required");
4110 
4111   unsigned VGPRBlocks;
4112   unsigned SGPRBlocks;
4113   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4114                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4115                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4116                          SGPRBlocks))
4117     return true;
4118 
4119   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4120           VGPRBlocks))
4121     return OutOfRangeError(VGPRRange);
4122   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4123                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4124 
4125   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4126           SGPRBlocks))
4127     return OutOfRangeError(SGPRRange);
4128   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4129                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4130                   SGPRBlocks);
4131 
4132   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4133     return TokError("too many user SGPRs enabled");
4134   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4135                   UserSGPRCount);
4136 
4137   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4138       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4139       ReserveFlatScr, ReserveXNACK);
4140   return false;
4141 }
4142 
4143 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4144   uint32_t Major;
4145   uint32_t Minor;
4146 
4147   if (ParseDirectiveMajorMinor(Major, Minor))
4148     return true;
4149 
4150   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4151   return false;
4152 }
4153 
4154 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4155   uint32_t Major;
4156   uint32_t Minor;
4157   uint32_t Stepping;
4158   StringRef VendorName;
4159   StringRef ArchName;
4160 
4161   // If this directive has no arguments, then use the ISA version for the
4162   // targeted GPU.
4163   if (getLexer().is(AsmToken::EndOfStatement)) {
4164     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4165     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4166                                                       ISA.Stepping,
4167                                                       "AMD", "AMDGPU");
4168     return false;
4169   }
4170 
4171   if (ParseDirectiveMajorMinor(Major, Minor))
4172     return true;
4173 
4174   if (getLexer().isNot(AsmToken::Comma))
4175     return TokError("stepping version number required, comma expected");
4176   Lex();
4177 
4178   if (ParseAsAbsoluteExpression(Stepping))
4179     return TokError("invalid stepping version");
4180 
4181   if (getLexer().isNot(AsmToken::Comma))
4182     return TokError("vendor name required, comma expected");
4183   Lex();
4184 
4185   if (getLexer().isNot(AsmToken::String))
4186     return TokError("invalid vendor name");
4187 
4188   VendorName = getLexer().getTok().getStringContents();
4189   Lex();
4190 
4191   if (getLexer().isNot(AsmToken::Comma))
4192     return TokError("arch name required, comma expected");
4193   Lex();
4194 
4195   if (getLexer().isNot(AsmToken::String))
4196     return TokError("invalid arch name");
4197 
4198   ArchName = getLexer().getTok().getStringContents();
4199   Lex();
4200 
4201   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4202                                                     VendorName, ArchName);
4203   return false;
4204 }
4205 
4206 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4207                                                amd_kernel_code_t &Header) {
4208   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4209   // assembly for backwards compatibility.
4210   if (ID == "max_scratch_backing_memory_byte_size") {
4211     Parser.eatToEndOfStatement();
4212     return false;
4213   }
4214 
4215   SmallString<40> ErrStr;
4216   raw_svector_ostream Err(ErrStr);
4217   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4218     return TokError(Err.str());
4219   }
4220   Lex();
4221 
4222   if (ID == "enable_wavefront_size32") {
4223     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4224       if (!isGFX10())
4225         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4226       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4227         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4228     } else {
4229       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4230         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4231     }
4232   }
4233 
4234   if (ID == "wavefront_size") {
4235     if (Header.wavefront_size == 5) {
4236       if (!isGFX10())
4237         return TokError("wavefront_size=5 is only allowed on GFX10+");
4238       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4239         return TokError("wavefront_size=5 requires +WavefrontSize32");
4240     } else if (Header.wavefront_size == 6) {
4241       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4242         return TokError("wavefront_size=6 requires +WavefrontSize64");
4243     }
4244   }
4245 
4246   if (ID == "enable_wgp_mode") {
4247     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4248       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4249   }
4250 
4251   if (ID == "enable_mem_ordered") {
4252     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4253       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4254   }
4255 
4256   if (ID == "enable_fwd_progress") {
4257     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4258       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4259   }
4260 
4261   return false;
4262 }
4263 
4264 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4265   amd_kernel_code_t Header;
4266   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4267 
4268   while (true) {
4269     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4270     // will set the current token to EndOfStatement.
4271     while(getLexer().is(AsmToken::EndOfStatement))
4272       Lex();
4273 
4274     if (getLexer().isNot(AsmToken::Identifier))
4275       return TokError("expected value identifier or .end_amd_kernel_code_t");
4276 
4277     StringRef ID = getLexer().getTok().getIdentifier();
4278     Lex();
4279 
4280     if (ID == ".end_amd_kernel_code_t")
4281       break;
4282 
4283     if (ParseAMDKernelCodeTValue(ID, Header))
4284       return true;
4285   }
4286 
4287   getTargetStreamer().EmitAMDKernelCodeT(Header);
4288 
4289   return false;
4290 }
4291 
4292 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4293   if (getLexer().isNot(AsmToken::Identifier))
4294     return TokError("expected symbol name");
4295 
4296   StringRef KernelName = Parser.getTok().getString();
4297 
4298   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4299                                            ELF::STT_AMDGPU_HSA_KERNEL);
4300   Lex();
4301   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4302     KernelScope.initialize(getContext());
4303   return false;
4304 }
4305 
4306 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4307   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4308     return Error(getParser().getTok().getLoc(),
4309                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4310                  "architectures");
4311   }
4312 
4313   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4314 
4315   std::string ISAVersionStringFromSTI;
4316   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4317   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4318 
4319   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4320     return Error(getParser().getTok().getLoc(),
4321                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4322                  "arguments specified through the command line");
4323   }
4324 
4325   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4326   Lex();
4327 
4328   return false;
4329 }
4330 
4331 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4332   const char *AssemblerDirectiveBegin;
4333   const char *AssemblerDirectiveEnd;
4334   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4335       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4336           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4337                             HSAMD::V3::AssemblerDirectiveEnd)
4338           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4339                             HSAMD::AssemblerDirectiveEnd);
4340 
4341   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4342     return Error(getParser().getTok().getLoc(),
4343                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4344                  "not available on non-amdhsa OSes")).str());
4345   }
4346 
4347   std::string HSAMetadataString;
4348   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4349                           HSAMetadataString))
4350     return true;
4351 
4352   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4353     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4354       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4355   } else {
4356     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4357       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4358   }
4359 
4360   return false;
4361 }
4362 
4363 /// Common code to parse out a block of text (typically YAML) between start and
4364 /// end directives.
4365 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4366                                           const char *AssemblerDirectiveEnd,
4367                                           std::string &CollectString) {
4368 
4369   raw_string_ostream CollectStream(CollectString);
4370 
4371   getLexer().setSkipSpace(false);
4372 
4373   bool FoundEnd = false;
4374   while (!getLexer().is(AsmToken::Eof)) {
4375     while (getLexer().is(AsmToken::Space)) {
4376       CollectStream << getLexer().getTok().getString();
4377       Lex();
4378     }
4379 
4380     if (getLexer().is(AsmToken::Identifier)) {
4381       StringRef ID = getLexer().getTok().getIdentifier();
4382       if (ID == AssemblerDirectiveEnd) {
4383         Lex();
4384         FoundEnd = true;
4385         break;
4386       }
4387     }
4388 
4389     CollectStream << Parser.parseStringToEndOfStatement()
4390                   << getContext().getAsmInfo()->getSeparatorString();
4391 
4392     Parser.eatToEndOfStatement();
4393   }
4394 
4395   getLexer().setSkipSpace(true);
4396 
4397   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4398     return TokError(Twine("expected directive ") +
4399                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4400   }
4401 
4402   CollectStream.flush();
4403   return false;
4404 }
4405 
4406 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4407 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4408   std::string String;
4409   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4410                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4411     return true;
4412 
4413   auto PALMetadata = getTargetStreamer().getPALMetadata();
4414   if (!PALMetadata->setFromString(String))
4415     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4416   return false;
4417 }
4418 
4419 /// Parse the assembler directive for old linear-format PAL metadata.
4420 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4421   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4422     return Error(getParser().getTok().getLoc(),
4423                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4424                  "not available on non-amdpal OSes")).str());
4425   }
4426 
4427   auto PALMetadata = getTargetStreamer().getPALMetadata();
4428   PALMetadata->setLegacy();
4429   for (;;) {
4430     uint32_t Key, Value;
4431     if (ParseAsAbsoluteExpression(Key)) {
4432       return TokError(Twine("invalid value in ") +
4433                       Twine(PALMD::AssemblerDirective));
4434     }
4435     if (getLexer().isNot(AsmToken::Comma)) {
4436       return TokError(Twine("expected an even number of values in ") +
4437                       Twine(PALMD::AssemblerDirective));
4438     }
4439     Lex();
4440     if (ParseAsAbsoluteExpression(Value)) {
4441       return TokError(Twine("invalid value in ") +
4442                       Twine(PALMD::AssemblerDirective));
4443     }
4444     PALMetadata->setRegister(Key, Value);
4445     if (getLexer().isNot(AsmToken::Comma))
4446       break;
4447     Lex();
4448   }
4449   return false;
4450 }
4451 
4452 /// ParseDirectiveAMDGPULDS
4453 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4454 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4455   if (getParser().checkForValidSection())
4456     return true;
4457 
4458   StringRef Name;
4459   SMLoc NameLoc = getLexer().getLoc();
4460   if (getParser().parseIdentifier(Name))
4461     return TokError("expected identifier in directive");
4462 
4463   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4464   if (parseToken(AsmToken::Comma, "expected ','"))
4465     return true;
4466 
4467   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4468 
4469   int64_t Size;
4470   SMLoc SizeLoc = getLexer().getLoc();
4471   if (getParser().parseAbsoluteExpression(Size))
4472     return true;
4473   if (Size < 0)
4474     return Error(SizeLoc, "size must be non-negative");
4475   if (Size > LocalMemorySize)
4476     return Error(SizeLoc, "size is too large");
4477 
4478   int64_t Alignment = 4;
4479   if (getLexer().is(AsmToken::Comma)) {
4480     Lex();
4481     SMLoc AlignLoc = getLexer().getLoc();
4482     if (getParser().parseAbsoluteExpression(Alignment))
4483       return true;
4484     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4485       return Error(AlignLoc, "alignment must be a power of two");
4486 
4487     // Alignment larger than the size of LDS is possible in theory, as long
4488     // as the linker manages to place to symbol at address 0, but we do want
4489     // to make sure the alignment fits nicely into a 32-bit integer.
4490     if (Alignment >= 1u << 31)
4491       return Error(AlignLoc, "alignment is too large");
4492   }
4493 
4494   if (parseToken(AsmToken::EndOfStatement,
4495                  "unexpected token in '.amdgpu_lds' directive"))
4496     return true;
4497 
4498   Symbol->redefineIfPossible();
4499   if (!Symbol->isUndefined())
4500     return Error(NameLoc, "invalid symbol redefinition");
4501 
4502   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4503   return false;
4504 }
4505 
4506 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4507   StringRef IDVal = DirectiveID.getString();
4508 
4509   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4510     if (IDVal == ".amdgcn_target")
4511       return ParseDirectiveAMDGCNTarget();
4512 
4513     if (IDVal == ".amdhsa_kernel")
4514       return ParseDirectiveAMDHSAKernel();
4515 
4516     // TODO: Restructure/combine with PAL metadata directive.
4517     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4518       return ParseDirectiveHSAMetadata();
4519   } else {
4520     if (IDVal == ".hsa_code_object_version")
4521       return ParseDirectiveHSACodeObjectVersion();
4522 
4523     if (IDVal == ".hsa_code_object_isa")
4524       return ParseDirectiveHSACodeObjectISA();
4525 
4526     if (IDVal == ".amd_kernel_code_t")
4527       return ParseDirectiveAMDKernelCodeT();
4528 
4529     if (IDVal == ".amdgpu_hsa_kernel")
4530       return ParseDirectiveAMDGPUHsaKernel();
4531 
4532     if (IDVal == ".amd_amdgpu_isa")
4533       return ParseDirectiveISAVersion();
4534 
4535     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4536       return ParseDirectiveHSAMetadata();
4537   }
4538 
4539   if (IDVal == ".amdgpu_lds")
4540     return ParseDirectiveAMDGPULDS();
4541 
4542   if (IDVal == PALMD::AssemblerDirectiveBegin)
4543     return ParseDirectivePALMetadataBegin();
4544 
4545   if (IDVal == PALMD::AssemblerDirective)
4546     return ParseDirectivePALMetadata();
4547 
4548   return true;
4549 }
4550 
4551 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4552                                            unsigned RegNo) const {
4553 
4554   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4555        R.isValid(); ++R) {
4556     if (*R == RegNo)
4557       return isGFX9() || isGFX10();
4558   }
4559 
4560   // GFX10 has 2 more SGPRs 104 and 105.
4561   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4562        R.isValid(); ++R) {
4563     if (*R == RegNo)
4564       return hasSGPR104_SGPR105();
4565   }
4566 
4567   switch (RegNo) {
4568   case AMDGPU::SRC_SHARED_BASE:
4569   case AMDGPU::SRC_SHARED_LIMIT:
4570   case AMDGPU::SRC_PRIVATE_BASE:
4571   case AMDGPU::SRC_PRIVATE_LIMIT:
4572   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4573     return !isCI() && !isSI() && !isVI();
4574   case AMDGPU::TBA:
4575   case AMDGPU::TBA_LO:
4576   case AMDGPU::TBA_HI:
4577   case AMDGPU::TMA:
4578   case AMDGPU::TMA_LO:
4579   case AMDGPU::TMA_HI:
4580     return !isGFX9() && !isGFX10();
4581   case AMDGPU::XNACK_MASK:
4582   case AMDGPU::XNACK_MASK_LO:
4583   case AMDGPU::XNACK_MASK_HI:
4584     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4585   case AMDGPU::SGPR_NULL:
4586     return isGFX10();
4587   default:
4588     break;
4589   }
4590 
4591   if (isCI())
4592     return true;
4593 
4594   if (isSI() || isGFX10()) {
4595     // No flat_scr on SI.
4596     // On GFX10 flat scratch is not a valid register operand and can only be
4597     // accessed with s_setreg/s_getreg.
4598     switch (RegNo) {
4599     case AMDGPU::FLAT_SCR:
4600     case AMDGPU::FLAT_SCR_LO:
4601     case AMDGPU::FLAT_SCR_HI:
4602       return false;
4603     default:
4604       return true;
4605     }
4606   }
4607 
4608   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4609   // SI/CI have.
4610   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4611        R.isValid(); ++R) {
4612     if (*R == RegNo)
4613       return hasSGPR102_SGPR103();
4614   }
4615 
4616   return true;
4617 }
4618 
4619 OperandMatchResultTy
4620 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4621                               OperandMode Mode) {
4622   // Try to parse with a custom parser
4623   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4624 
4625   // If we successfully parsed the operand or if there as an error parsing,
4626   // we are done.
4627   //
4628   // If we are parsing after we reach EndOfStatement then this means we
4629   // are appending default values to the Operands list.  This is only done
4630   // by custom parser, so we shouldn't continue on to the generic parsing.
4631   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4632       getLexer().is(AsmToken::EndOfStatement))
4633     return ResTy;
4634 
4635   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4636     unsigned Prefix = Operands.size();
4637     SMLoc LBraceLoc = getTok().getLoc();
4638     Parser.Lex(); // eat the '['
4639 
4640     for (;;) {
4641       ResTy = parseReg(Operands);
4642       if (ResTy != MatchOperand_Success)
4643         return ResTy;
4644 
4645       if (getLexer().is(AsmToken::RBrac))
4646         break;
4647 
4648       if (getLexer().isNot(AsmToken::Comma))
4649         return MatchOperand_ParseFail;
4650       Parser.Lex();
4651     }
4652 
4653     if (Operands.size() - Prefix > 1) {
4654       Operands.insert(Operands.begin() + Prefix,
4655                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4656       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4657                                                     getTok().getLoc()));
4658     }
4659 
4660     Parser.Lex(); // eat the ']'
4661     return MatchOperand_Success;
4662   }
4663 
4664   return parseRegOrImm(Operands);
4665 }
4666 
4667 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4668   // Clear any forced encodings from the previous instruction.
4669   setForcedEncodingSize(0);
4670   setForcedDPP(false);
4671   setForcedSDWA(false);
4672 
4673   if (Name.endswith("_e64")) {
4674     setForcedEncodingSize(64);
4675     return Name.substr(0, Name.size() - 4);
4676   } else if (Name.endswith("_e32")) {
4677     setForcedEncodingSize(32);
4678     return Name.substr(0, Name.size() - 4);
4679   } else if (Name.endswith("_dpp")) {
4680     setForcedDPP(true);
4681     return Name.substr(0, Name.size() - 4);
4682   } else if (Name.endswith("_sdwa")) {
4683     setForcedSDWA(true);
4684     return Name.substr(0, Name.size() - 5);
4685   }
4686   return Name;
4687 }
4688 
4689 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4690                                        StringRef Name,
4691                                        SMLoc NameLoc, OperandVector &Operands) {
4692   // Add the instruction mnemonic
4693   Name = parseMnemonicSuffix(Name);
4694   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4695 
4696   bool IsMIMG = Name.startswith("image_");
4697 
4698   while (!getLexer().is(AsmToken::EndOfStatement)) {
4699     OperandMode Mode = OperandMode_Default;
4700     if (IsMIMG && isGFX10() && Operands.size() == 2)
4701       Mode = OperandMode_NSA;
4702     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4703 
4704     // Eat the comma or space if there is one.
4705     if (getLexer().is(AsmToken::Comma))
4706       Parser.Lex();
4707 
4708     switch (Res) {
4709       case MatchOperand_Success: break;
4710       case MatchOperand_ParseFail:
4711         // FIXME: use real operand location rather than the current location.
4712         Error(getLexer().getLoc(), "failed parsing operand.");
4713         while (!getLexer().is(AsmToken::EndOfStatement)) {
4714           Parser.Lex();
4715         }
4716         return true;
4717       case MatchOperand_NoMatch:
4718         // FIXME: use real operand location rather than the current location.
4719         Error(getLexer().getLoc(), "not a valid operand.");
4720         while (!getLexer().is(AsmToken::EndOfStatement)) {
4721           Parser.Lex();
4722         }
4723         return true;
4724     }
4725   }
4726 
4727   return false;
4728 }
4729 
4730 //===----------------------------------------------------------------------===//
4731 // Utility functions
4732 //===----------------------------------------------------------------------===//
4733 
4734 OperandMatchResultTy
4735 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4736 
4737   if (!trySkipId(Prefix, AsmToken::Colon))
4738     return MatchOperand_NoMatch;
4739 
4740   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4741 }
4742 
4743 OperandMatchResultTy
4744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4745                                     AMDGPUOperand::ImmTy ImmTy,
4746                                     bool (*ConvertResult)(int64_t&)) {
4747   SMLoc S = getLoc();
4748   int64_t Value = 0;
4749 
4750   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4751   if (Res != MatchOperand_Success)
4752     return Res;
4753 
4754   if (ConvertResult && !ConvertResult(Value)) {
4755     Error(S, "invalid " + StringRef(Prefix) + " value.");
4756   }
4757 
4758   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4759   return MatchOperand_Success;
4760 }
4761 
4762 OperandMatchResultTy
4763 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4764                                              OperandVector &Operands,
4765                                              AMDGPUOperand::ImmTy ImmTy,
4766                                              bool (*ConvertResult)(int64_t&)) {
4767   SMLoc S = getLoc();
4768   if (!trySkipId(Prefix, AsmToken::Colon))
4769     return MatchOperand_NoMatch;
4770 
4771   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4772     return MatchOperand_ParseFail;
4773 
4774   unsigned Val = 0;
4775   const unsigned MaxSize = 4;
4776 
4777   // FIXME: How to verify the number of elements matches the number of src
4778   // operands?
4779   for (int I = 0; ; ++I) {
4780     int64_t Op;
4781     SMLoc Loc = getLoc();
4782     if (!parseExpr(Op))
4783       return MatchOperand_ParseFail;
4784 
4785     if (Op != 0 && Op != 1) {
4786       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4787       return MatchOperand_ParseFail;
4788     }
4789 
4790     Val |= (Op << I);
4791 
4792     if (trySkipToken(AsmToken::RBrac))
4793       break;
4794 
4795     if (I + 1 == MaxSize) {
4796       Error(getLoc(), "expected a closing square bracket");
4797       return MatchOperand_ParseFail;
4798     }
4799 
4800     if (!skipToken(AsmToken::Comma, "expected a comma"))
4801       return MatchOperand_ParseFail;
4802   }
4803 
4804   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4805   return MatchOperand_Success;
4806 }
4807 
4808 OperandMatchResultTy
4809 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4810                                AMDGPUOperand::ImmTy ImmTy) {
4811   int64_t Bit = 0;
4812   SMLoc S = Parser.getTok().getLoc();
4813 
4814   // We are at the end of the statement, and this is a default argument, so
4815   // use a default value.
4816   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4817     switch(getLexer().getKind()) {
4818       case AsmToken::Identifier: {
4819         StringRef Tok = Parser.getTok().getString();
4820         if (Tok == Name) {
4821           if (Tok == "r128" && !hasMIMG_R128())
4822             Error(S, "r128 modifier is not supported on this GPU");
4823           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4824             Error(S, "a16 modifier is not supported on this GPU");
4825           Bit = 1;
4826           Parser.Lex();
4827         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4828           Bit = 0;
4829           Parser.Lex();
4830         } else {
4831           return MatchOperand_NoMatch;
4832         }
4833         break;
4834       }
4835       default:
4836         return MatchOperand_NoMatch;
4837     }
4838   }
4839 
4840   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4841     return MatchOperand_ParseFail;
4842 
4843   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4844     ImmTy = AMDGPUOperand::ImmTyR128A16;
4845 
4846   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4847   return MatchOperand_Success;
4848 }
4849 
4850 static void addOptionalImmOperand(
4851   MCInst& Inst, const OperandVector& Operands,
4852   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4853   AMDGPUOperand::ImmTy ImmT,
4854   int64_t Default = 0) {
4855   auto i = OptionalIdx.find(ImmT);
4856   if (i != OptionalIdx.end()) {
4857     unsigned Idx = i->second;
4858     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4859   } else {
4860     Inst.addOperand(MCOperand::createImm(Default));
4861   }
4862 }
4863 
4864 OperandMatchResultTy
4865 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4866   if (getLexer().isNot(AsmToken::Identifier)) {
4867     return MatchOperand_NoMatch;
4868   }
4869   StringRef Tok = Parser.getTok().getString();
4870   if (Tok != Prefix) {
4871     return MatchOperand_NoMatch;
4872   }
4873 
4874   Parser.Lex();
4875   if (getLexer().isNot(AsmToken::Colon)) {
4876     return MatchOperand_ParseFail;
4877   }
4878 
4879   Parser.Lex();
4880   if (getLexer().isNot(AsmToken::Identifier)) {
4881     return MatchOperand_ParseFail;
4882   }
4883 
4884   Value = Parser.getTok().getString();
4885   return MatchOperand_Success;
4886 }
4887 
4888 //===----------------------------------------------------------------------===//
4889 // MTBUF format
4890 //===----------------------------------------------------------------------===//
4891 
4892 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
4893                                   int64_t MaxVal,
4894                                   int64_t &Fmt) {
4895   int64_t Val;
4896   SMLoc Loc = getLoc();
4897 
4898   auto Res = parseIntWithPrefix(Pref, Val);
4899   if (Res == MatchOperand_ParseFail)
4900     return false;
4901   if (Res == MatchOperand_NoMatch)
4902     return true;
4903 
4904   if (Val < 0 || Val > MaxVal) {
4905     Error(Loc, Twine("out of range ", StringRef(Pref)));
4906     return false;
4907   }
4908 
4909   Fmt = Val;
4910   return true;
4911 }
4912 
4913 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4914 // values to live in a joint format operand in the MCInst encoding.
4915 OperandMatchResultTy
4916 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
4917   using namespace llvm::AMDGPU::MTBUFFormat;
4918 
4919   int64_t Dfmt = DFMT_UNDEF;
4920   int64_t Nfmt = NFMT_UNDEF;
4921 
4922   // dfmt and nfmt can appear in either order, and each is optional.
4923   for (int I = 0; I < 2; ++I) {
4924     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
4925       return MatchOperand_ParseFail;
4926 
4927     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
4928       return MatchOperand_ParseFail;
4929     }
4930     // Skip optional comma between dfmt/nfmt
4931     // but guard against 2 commas following each other.
4932     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
4933         !peekToken().is(AsmToken::Comma)) {
4934       trySkipToken(AsmToken::Comma);
4935     }
4936   }
4937 
4938   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
4939     return MatchOperand_NoMatch;
4940 
4941   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
4942   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
4943 
4944   Format = encodeDfmtNfmt(Dfmt, Nfmt);
4945   return MatchOperand_Success;
4946 }
4947 
4948 OperandMatchResultTy
4949 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
4950   using namespace llvm::AMDGPU::MTBUFFormat;
4951 
4952   int64_t Fmt = UFMT_UNDEF;
4953 
4954   if (!tryParseFmt("format", UFMT_MAX, Fmt))
4955     return MatchOperand_ParseFail;
4956 
4957   if (Fmt == UFMT_UNDEF)
4958     return MatchOperand_NoMatch;
4959 
4960   Format = Fmt;
4961   return MatchOperand_Success;
4962 }
4963 
4964 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
4965                                     int64_t &Nfmt,
4966                                     StringRef FormatStr,
4967                                     SMLoc Loc) {
4968   using namespace llvm::AMDGPU::MTBUFFormat;
4969   int64_t Format;
4970 
4971   Format = getDfmt(FormatStr);
4972   if (Format != DFMT_UNDEF) {
4973     Dfmt = Format;
4974     return true;
4975   }
4976 
4977   Format = getNfmt(FormatStr, getSTI());
4978   if (Format != NFMT_UNDEF) {
4979     Nfmt = Format;
4980     return true;
4981   }
4982 
4983   Error(Loc, "unsupported format");
4984   return false;
4985 }
4986 
4987 OperandMatchResultTy
4988 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
4989                                           SMLoc FormatLoc,
4990                                           int64_t &Format) {
4991   using namespace llvm::AMDGPU::MTBUFFormat;
4992 
4993   int64_t Dfmt = DFMT_UNDEF;
4994   int64_t Nfmt = NFMT_UNDEF;
4995   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
4996     return MatchOperand_ParseFail;
4997 
4998   if (trySkipToken(AsmToken::Comma)) {
4999     StringRef Str;
5000     SMLoc Loc = getLoc();
5001     if (!parseId(Str, "expected a format string") ||
5002         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5003       return MatchOperand_ParseFail;
5004     }
5005     if (Dfmt == DFMT_UNDEF) {
5006       Error(Loc, "duplicate numeric format");
5007     } else if (Nfmt == NFMT_UNDEF){
5008       Error(Loc, "duplicate data format");
5009     }
5010   }
5011 
5012   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5013   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5014 
5015   if (isGFX10()) {
5016     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5017     if (Ufmt == UFMT_UNDEF)
5018       Error(FormatLoc, "unsupported format");
5019     Format = Ufmt;
5020   } else {
5021     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5022   }
5023 
5024   return MatchOperand_Success;
5025 }
5026 
5027 OperandMatchResultTy
5028 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5029                                             SMLoc Loc,
5030                                             int64_t &Format) {
5031   using namespace llvm::AMDGPU::MTBUFFormat;
5032 
5033   auto Id = getUnifiedFormat(FormatStr);
5034   if (Id == UFMT_UNDEF)
5035     return MatchOperand_NoMatch;
5036 
5037   if (!isGFX10()) {
5038     Error(Loc, "unified format is not supported on this GPU");
5039     return MatchOperand_ParseFail;
5040   }
5041 
5042   Format = Id;
5043   return MatchOperand_Success;
5044 }
5045 
5046 OperandMatchResultTy
5047 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5048   using namespace llvm::AMDGPU::MTBUFFormat;
5049   SMLoc Loc = getLoc();
5050 
5051   if (!parseExpr(Format))
5052     return MatchOperand_ParseFail;
5053   if (!isValidFormatEncoding(Format, getSTI())) {
5054     Error(Loc, "out of range format");
5055     return MatchOperand_ParseFail;
5056   }
5057 
5058   return MatchOperand_Success;
5059 }
5060 
5061 OperandMatchResultTy
5062 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5063   using namespace llvm::AMDGPU::MTBUFFormat;
5064 
5065   if (!trySkipId("format", AsmToken::Colon))
5066     return MatchOperand_NoMatch;
5067 
5068   if (trySkipToken(AsmToken::LBrac)) {
5069     StringRef FormatStr;
5070     SMLoc Loc = getLoc();
5071     if (!parseId(FormatStr, "expected a format string"))
5072       return MatchOperand_ParseFail;
5073 
5074     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5075     if (Res == MatchOperand_NoMatch)
5076       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5077     if (Res != MatchOperand_Success)
5078       return Res;
5079 
5080     skipToken(AsmToken::RBrac, "expected a closing square bracket");
5081     return MatchOperand_Success;
5082   }
5083 
5084   return parseNumericFormat(Format);
5085 }
5086 
5087 OperandMatchResultTy
5088 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5089   using namespace llvm::AMDGPU::MTBUFFormat;
5090 
5091   int64_t Format = getDefaultFormatEncoding(getSTI());
5092   OperandMatchResultTy Res;
5093   SMLoc Loc = getLoc();
5094 
5095   // Parse legacy format syntax.
5096   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5097   if (Res == MatchOperand_ParseFail)
5098     return Res;
5099 
5100   bool FormatFound = (Res == MatchOperand_Success);
5101 
5102   Operands.push_back(
5103     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5104 
5105   if (FormatFound)
5106     trySkipToken(AsmToken::Comma);
5107 
5108   if (isToken(AsmToken::EndOfStatement)) {
5109     // We are expecting an soffset operand,
5110     // but let matcher handle the error.
5111     return MatchOperand_Success;
5112   }
5113 
5114   // Parse soffset.
5115   Res = parseRegOrImm(Operands);
5116   if (Res != MatchOperand_Success)
5117     return Res;
5118 
5119   trySkipToken(AsmToken::Comma);
5120 
5121   if (!FormatFound) {
5122     if (parseSymbolicOrNumericFormat(Format) == MatchOperand_Success) {
5123       auto Size = Operands.size();
5124       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5125       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5126       Op.setImm(Format);
5127     }
5128     return MatchOperand_Success;
5129   }
5130 
5131   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5132     Error(getLoc(), "duplicate format");
5133     return MatchOperand_ParseFail;
5134   }
5135   return MatchOperand_Success;
5136 }
5137 
5138 //===----------------------------------------------------------------------===//
5139 // ds
5140 //===----------------------------------------------------------------------===//
5141 
5142 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5143                                     const OperandVector &Operands) {
5144   OptionalImmIndexMap OptionalIdx;
5145 
5146   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5147     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5148 
5149     // Add the register arguments
5150     if (Op.isReg()) {
5151       Op.addRegOperands(Inst, 1);
5152       continue;
5153     }
5154 
5155     // Handle optional arguments
5156     OptionalIdx[Op.getImmTy()] = i;
5157   }
5158 
5159   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5160   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5162 
5163   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5164 }
5165 
5166 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5167                                 bool IsGdsHardcoded) {
5168   OptionalImmIndexMap OptionalIdx;
5169 
5170   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5171     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5172 
5173     // Add the register arguments
5174     if (Op.isReg()) {
5175       Op.addRegOperands(Inst, 1);
5176       continue;
5177     }
5178 
5179     if (Op.isToken() && Op.getToken() == "gds") {
5180       IsGdsHardcoded = true;
5181       continue;
5182     }
5183 
5184     // Handle optional arguments
5185     OptionalIdx[Op.getImmTy()] = i;
5186   }
5187 
5188   AMDGPUOperand::ImmTy OffsetType =
5189     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5190      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5191      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5192                                                       AMDGPUOperand::ImmTyOffset;
5193 
5194   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5195 
5196   if (!IsGdsHardcoded) {
5197     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5198   }
5199   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5200 }
5201 
5202 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5203   OptionalImmIndexMap OptionalIdx;
5204 
5205   unsigned OperandIdx[4];
5206   unsigned EnMask = 0;
5207   int SrcIdx = 0;
5208 
5209   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5210     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5211 
5212     // Add the register arguments
5213     if (Op.isReg()) {
5214       assert(SrcIdx < 4);
5215       OperandIdx[SrcIdx] = Inst.size();
5216       Op.addRegOperands(Inst, 1);
5217       ++SrcIdx;
5218       continue;
5219     }
5220 
5221     if (Op.isOff()) {
5222       assert(SrcIdx < 4);
5223       OperandIdx[SrcIdx] = Inst.size();
5224       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5225       ++SrcIdx;
5226       continue;
5227     }
5228 
5229     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5230       Op.addImmOperands(Inst, 1);
5231       continue;
5232     }
5233 
5234     if (Op.isToken() && Op.getToken() == "done")
5235       continue;
5236 
5237     // Handle optional arguments
5238     OptionalIdx[Op.getImmTy()] = i;
5239   }
5240 
5241   assert(SrcIdx == 4);
5242 
5243   bool Compr = false;
5244   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5245     Compr = true;
5246     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5247     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5248     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5249   }
5250 
5251   for (auto i = 0; i < SrcIdx; ++i) {
5252     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5253       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5254     }
5255   }
5256 
5257   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5258   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5259 
5260   Inst.addOperand(MCOperand::createImm(EnMask));
5261 }
5262 
5263 //===----------------------------------------------------------------------===//
5264 // s_waitcnt
5265 //===----------------------------------------------------------------------===//
5266 
5267 static bool
5268 encodeCnt(
5269   const AMDGPU::IsaVersion ISA,
5270   int64_t &IntVal,
5271   int64_t CntVal,
5272   bool Saturate,
5273   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5274   unsigned (*decode)(const IsaVersion &Version, unsigned))
5275 {
5276   bool Failed = false;
5277 
5278   IntVal = encode(ISA, IntVal, CntVal);
5279   if (CntVal != decode(ISA, IntVal)) {
5280     if (Saturate) {
5281       IntVal = encode(ISA, IntVal, -1);
5282     } else {
5283       Failed = true;
5284     }
5285   }
5286   return Failed;
5287 }
5288 
5289 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5290 
5291   SMLoc CntLoc = getLoc();
5292   StringRef CntName = getTokenStr();
5293 
5294   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5295       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5296     return false;
5297 
5298   int64_t CntVal;
5299   SMLoc ValLoc = getLoc();
5300   if (!parseExpr(CntVal))
5301     return false;
5302 
5303   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5304 
5305   bool Failed = true;
5306   bool Sat = CntName.endswith("_sat");
5307 
5308   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5309     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5310   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5311     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5312   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5313     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5314   } else {
5315     Error(CntLoc, "invalid counter name " + CntName);
5316     return false;
5317   }
5318 
5319   if (Failed) {
5320     Error(ValLoc, "too large value for " + CntName);
5321     return false;
5322   }
5323 
5324   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5325     return false;
5326 
5327   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5328     if (isToken(AsmToken::EndOfStatement)) {
5329       Error(getLoc(), "expected a counter name");
5330       return false;
5331     }
5332   }
5333 
5334   return true;
5335 }
5336 
5337 OperandMatchResultTy
5338 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5339   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5340   int64_t Waitcnt = getWaitcntBitMask(ISA);
5341   SMLoc S = getLoc();
5342 
5343   // If parse failed, do not return error code
5344   // to avoid excessive error messages.
5345   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5346     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5347   } else {
5348     parseExpr(Waitcnt);
5349   }
5350 
5351   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5352   return MatchOperand_Success;
5353 }
5354 
5355 bool
5356 AMDGPUOperand::isSWaitCnt() const {
5357   return isImm();
5358 }
5359 
5360 //===----------------------------------------------------------------------===//
5361 // hwreg
5362 //===----------------------------------------------------------------------===//
5363 
5364 bool
5365 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5366                                 int64_t &Offset,
5367                                 int64_t &Width) {
5368   using namespace llvm::AMDGPU::Hwreg;
5369 
5370   // The register may be specified by name or using a numeric code
5371   if (isToken(AsmToken::Identifier) &&
5372       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5373     HwReg.IsSymbolic = true;
5374     lex(); // skip message name
5375   } else if (!parseExpr(HwReg.Id)) {
5376     return false;
5377   }
5378 
5379   if (trySkipToken(AsmToken::RParen))
5380     return true;
5381 
5382   // parse optional params
5383   return
5384     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5385     parseExpr(Offset) &&
5386     skipToken(AsmToken::Comma, "expected a comma") &&
5387     parseExpr(Width) &&
5388     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5389 }
5390 
5391 bool
5392 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5393                                const int64_t Offset,
5394                                const int64_t Width,
5395                                const SMLoc Loc) {
5396 
5397   using namespace llvm::AMDGPU::Hwreg;
5398 
5399   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5400     Error(Loc, "specified hardware register is not supported on this GPU");
5401     return false;
5402   } else if (!isValidHwreg(HwReg.Id)) {
5403     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5404     return false;
5405   } else if (!isValidHwregOffset(Offset)) {
5406     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5407     return false;
5408   } else if (!isValidHwregWidth(Width)) {
5409     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5410     return false;
5411   }
5412   return true;
5413 }
5414 
5415 OperandMatchResultTy
5416 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5417   using namespace llvm::AMDGPU::Hwreg;
5418 
5419   int64_t ImmVal = 0;
5420   SMLoc Loc = getLoc();
5421 
5422   // If parse failed, do not return error code
5423   // to avoid excessive error messages.
5424   if (trySkipId("hwreg", AsmToken::LParen)) {
5425     OperandInfoTy HwReg(ID_UNKNOWN_);
5426     int64_t Offset = OFFSET_DEFAULT_;
5427     int64_t Width = WIDTH_DEFAULT_;
5428     if (parseHwregBody(HwReg, Offset, Width) &&
5429         validateHwreg(HwReg, Offset, Width, Loc)) {
5430       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5431     }
5432   } else if (parseExpr(ImmVal)) {
5433     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5434       Error(Loc, "invalid immediate: only 16-bit values are legal");
5435   }
5436 
5437   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5438   return MatchOperand_Success;
5439 }
5440 
5441 bool AMDGPUOperand::isHwreg() const {
5442   return isImmTy(ImmTyHwreg);
5443 }
5444 
5445 //===----------------------------------------------------------------------===//
5446 // sendmsg
5447 //===----------------------------------------------------------------------===//
5448 
5449 bool
5450 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5451                                   OperandInfoTy &Op,
5452                                   OperandInfoTy &Stream) {
5453   using namespace llvm::AMDGPU::SendMsg;
5454 
5455   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5456     Msg.IsSymbolic = true;
5457     lex(); // skip message name
5458   } else if (!parseExpr(Msg.Id)) {
5459     return false;
5460   }
5461 
5462   if (trySkipToken(AsmToken::Comma)) {
5463     Op.IsDefined = true;
5464     if (isToken(AsmToken::Identifier) &&
5465         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5466       lex(); // skip operation name
5467     } else if (!parseExpr(Op.Id)) {
5468       return false;
5469     }
5470 
5471     if (trySkipToken(AsmToken::Comma)) {
5472       Stream.IsDefined = true;
5473       if (!parseExpr(Stream.Id))
5474         return false;
5475     }
5476   }
5477 
5478   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5479 }
5480 
5481 bool
5482 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5483                                  const OperandInfoTy &Op,
5484                                  const OperandInfoTy &Stream,
5485                                  const SMLoc S) {
5486   using namespace llvm::AMDGPU::SendMsg;
5487 
5488   // Validation strictness depends on whether message is specified
5489   // in a symbolc or in a numeric form. In the latter case
5490   // only encoding possibility is checked.
5491   bool Strict = Msg.IsSymbolic;
5492 
5493   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5494     Error(S, "invalid message id");
5495     return false;
5496   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5497     Error(S, Op.IsDefined ?
5498              "message does not support operations" :
5499              "missing message operation");
5500     return false;
5501   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5502     Error(S, "invalid operation id");
5503     return false;
5504   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5505     Error(S, "message operation does not support streams");
5506     return false;
5507   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5508     Error(S, "invalid message stream id");
5509     return false;
5510   }
5511   return true;
5512 }
5513 
5514 OperandMatchResultTy
5515 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5516   using namespace llvm::AMDGPU::SendMsg;
5517 
5518   int64_t ImmVal = 0;
5519   SMLoc Loc = getLoc();
5520 
5521   // If parse failed, do not return error code
5522   // to avoid excessive error messages.
5523   if (trySkipId("sendmsg", AsmToken::LParen)) {
5524     OperandInfoTy Msg(ID_UNKNOWN_);
5525     OperandInfoTy Op(OP_NONE_);
5526     OperandInfoTy Stream(STREAM_ID_NONE_);
5527     if (parseSendMsgBody(Msg, Op, Stream) &&
5528         validateSendMsg(Msg, Op, Stream, Loc)) {
5529       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5530     }
5531   } else if (parseExpr(ImmVal)) {
5532     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5533       Error(Loc, "invalid immediate: only 16-bit values are legal");
5534   }
5535 
5536   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5537   return MatchOperand_Success;
5538 }
5539 
5540 bool AMDGPUOperand::isSendMsg() const {
5541   return isImmTy(ImmTySendMsg);
5542 }
5543 
5544 //===----------------------------------------------------------------------===//
5545 // v_interp
5546 //===----------------------------------------------------------------------===//
5547 
5548 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5549   if (getLexer().getKind() != AsmToken::Identifier)
5550     return MatchOperand_NoMatch;
5551 
5552   StringRef Str = Parser.getTok().getString();
5553   int Slot = StringSwitch<int>(Str)
5554     .Case("p10", 0)
5555     .Case("p20", 1)
5556     .Case("p0", 2)
5557     .Default(-1);
5558 
5559   SMLoc S = Parser.getTok().getLoc();
5560   if (Slot == -1)
5561     return MatchOperand_ParseFail;
5562 
5563   Parser.Lex();
5564   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5565                                               AMDGPUOperand::ImmTyInterpSlot));
5566   return MatchOperand_Success;
5567 }
5568 
5569 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5570   if (getLexer().getKind() != AsmToken::Identifier)
5571     return MatchOperand_NoMatch;
5572 
5573   StringRef Str = Parser.getTok().getString();
5574   if (!Str.startswith("attr"))
5575     return MatchOperand_NoMatch;
5576 
5577   StringRef Chan = Str.take_back(2);
5578   int AttrChan = StringSwitch<int>(Chan)
5579     .Case(".x", 0)
5580     .Case(".y", 1)
5581     .Case(".z", 2)
5582     .Case(".w", 3)
5583     .Default(-1);
5584   if (AttrChan == -1)
5585     return MatchOperand_ParseFail;
5586 
5587   Str = Str.drop_back(2).drop_front(4);
5588 
5589   uint8_t Attr;
5590   if (Str.getAsInteger(10, Attr))
5591     return MatchOperand_ParseFail;
5592 
5593   SMLoc S = Parser.getTok().getLoc();
5594   Parser.Lex();
5595   if (Attr > 63) {
5596     Error(S, "out of bounds attr");
5597     return MatchOperand_Success;
5598   }
5599 
5600   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5601 
5602   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5603                                               AMDGPUOperand::ImmTyInterpAttr));
5604   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5605                                               AMDGPUOperand::ImmTyAttrChan));
5606   return MatchOperand_Success;
5607 }
5608 
5609 //===----------------------------------------------------------------------===//
5610 // exp
5611 //===----------------------------------------------------------------------===//
5612 
5613 void AMDGPUAsmParser::errorExpTgt() {
5614   Error(Parser.getTok().getLoc(), "invalid exp target");
5615 }
5616 
5617 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5618                                                       uint8_t &Val) {
5619   if (Str == "null") {
5620     Val = 9;
5621     return MatchOperand_Success;
5622   }
5623 
5624   if (Str.startswith("mrt")) {
5625     Str = Str.drop_front(3);
5626     if (Str == "z") { // == mrtz
5627       Val = 8;
5628       return MatchOperand_Success;
5629     }
5630 
5631     if (Str.getAsInteger(10, Val))
5632       return MatchOperand_ParseFail;
5633 
5634     if (Val > 7)
5635       errorExpTgt();
5636 
5637     return MatchOperand_Success;
5638   }
5639 
5640   if (Str.startswith("pos")) {
5641     Str = Str.drop_front(3);
5642     if (Str.getAsInteger(10, Val))
5643       return MatchOperand_ParseFail;
5644 
5645     if (Val > 4 || (Val == 4 && !isGFX10()))
5646       errorExpTgt();
5647 
5648     Val += 12;
5649     return MatchOperand_Success;
5650   }
5651 
5652   if (isGFX10() && Str == "prim") {
5653     Val = 20;
5654     return MatchOperand_Success;
5655   }
5656 
5657   if (Str.startswith("param")) {
5658     Str = Str.drop_front(5);
5659     if (Str.getAsInteger(10, Val))
5660       return MatchOperand_ParseFail;
5661 
5662     if (Val >= 32)
5663       errorExpTgt();
5664 
5665     Val += 32;
5666     return MatchOperand_Success;
5667   }
5668 
5669   if (Str.startswith("invalid_target_")) {
5670     Str = Str.drop_front(15);
5671     if (Str.getAsInteger(10, Val))
5672       return MatchOperand_ParseFail;
5673 
5674     errorExpTgt();
5675     return MatchOperand_Success;
5676   }
5677 
5678   return MatchOperand_NoMatch;
5679 }
5680 
5681 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5682   uint8_t Val;
5683   StringRef Str = Parser.getTok().getString();
5684 
5685   auto Res = parseExpTgtImpl(Str, Val);
5686   if (Res != MatchOperand_Success)
5687     return Res;
5688 
5689   SMLoc S = Parser.getTok().getLoc();
5690   Parser.Lex();
5691 
5692   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5693                                               AMDGPUOperand::ImmTyExpTgt));
5694   return MatchOperand_Success;
5695 }
5696 
5697 //===----------------------------------------------------------------------===//
5698 // parser helpers
5699 //===----------------------------------------------------------------------===//
5700 
5701 bool
5702 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5703   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5704 }
5705 
5706 bool
5707 AMDGPUAsmParser::isId(const StringRef Id) const {
5708   return isId(getToken(), Id);
5709 }
5710 
5711 bool
5712 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5713   return getTokenKind() == Kind;
5714 }
5715 
5716 bool
5717 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5718   if (isId(Id)) {
5719     lex();
5720     return true;
5721   }
5722   return false;
5723 }
5724 
5725 bool
5726 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5727   if (isId(Id) && peekToken().is(Kind)) {
5728     lex();
5729     lex();
5730     return true;
5731   }
5732   return false;
5733 }
5734 
5735 bool
5736 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5737   if (isToken(Kind)) {
5738     lex();
5739     return true;
5740   }
5741   return false;
5742 }
5743 
5744 bool
5745 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5746                            const StringRef ErrMsg) {
5747   if (!trySkipToken(Kind)) {
5748     Error(getLoc(), ErrMsg);
5749     return false;
5750   }
5751   return true;
5752 }
5753 
5754 bool
5755 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5756   return !getParser().parseAbsoluteExpression(Imm);
5757 }
5758 
5759 bool
5760 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5761   SMLoc S = getLoc();
5762 
5763   const MCExpr *Expr;
5764   if (Parser.parseExpression(Expr))
5765     return false;
5766 
5767   int64_t IntVal;
5768   if (Expr->evaluateAsAbsolute(IntVal)) {
5769     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5770   } else {
5771     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5772   }
5773   return true;
5774 }
5775 
5776 bool
5777 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5778   if (isToken(AsmToken::String)) {
5779     Val = getToken().getStringContents();
5780     lex();
5781     return true;
5782   } else {
5783     Error(getLoc(), ErrMsg);
5784     return false;
5785   }
5786 }
5787 
5788 bool
5789 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5790   if (isToken(AsmToken::Identifier)) {
5791     Val = getTokenStr();
5792     lex();
5793     return true;
5794   } else {
5795     Error(getLoc(), ErrMsg);
5796     return false;
5797   }
5798 }
5799 
5800 AsmToken
5801 AMDGPUAsmParser::getToken() const {
5802   return Parser.getTok();
5803 }
5804 
5805 AsmToken
5806 AMDGPUAsmParser::peekToken() {
5807   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
5808 }
5809 
5810 void
5811 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5812   auto TokCount = getLexer().peekTokens(Tokens);
5813 
5814   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5815     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5816 }
5817 
5818 AsmToken::TokenKind
5819 AMDGPUAsmParser::getTokenKind() const {
5820   return getLexer().getKind();
5821 }
5822 
5823 SMLoc
5824 AMDGPUAsmParser::getLoc() const {
5825   return getToken().getLoc();
5826 }
5827 
5828 StringRef
5829 AMDGPUAsmParser::getTokenStr() const {
5830   return getToken().getString();
5831 }
5832 
5833 void
5834 AMDGPUAsmParser::lex() {
5835   Parser.Lex();
5836 }
5837 
5838 //===----------------------------------------------------------------------===//
5839 // swizzle
5840 //===----------------------------------------------------------------------===//
5841 
5842 LLVM_READNONE
5843 static unsigned
5844 encodeBitmaskPerm(const unsigned AndMask,
5845                   const unsigned OrMask,
5846                   const unsigned XorMask) {
5847   using namespace llvm::AMDGPU::Swizzle;
5848 
5849   return BITMASK_PERM_ENC |
5850          (AndMask << BITMASK_AND_SHIFT) |
5851          (OrMask  << BITMASK_OR_SHIFT)  |
5852          (XorMask << BITMASK_XOR_SHIFT);
5853 }
5854 
5855 bool
5856 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5857                                       const unsigned MinVal,
5858                                       const unsigned MaxVal,
5859                                       const StringRef ErrMsg) {
5860   for (unsigned i = 0; i < OpNum; ++i) {
5861     if (!skipToken(AsmToken::Comma, "expected a comma")){
5862       return false;
5863     }
5864     SMLoc ExprLoc = Parser.getTok().getLoc();
5865     if (!parseExpr(Op[i])) {
5866       return false;
5867     }
5868     if (Op[i] < MinVal || Op[i] > MaxVal) {
5869       Error(ExprLoc, ErrMsg);
5870       return false;
5871     }
5872   }
5873 
5874   return true;
5875 }
5876 
5877 bool
5878 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5879   using namespace llvm::AMDGPU::Swizzle;
5880 
5881   int64_t Lane[LANE_NUM];
5882   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5883                            "expected a 2-bit lane id")) {
5884     Imm = QUAD_PERM_ENC;
5885     for (unsigned I = 0; I < LANE_NUM; ++I) {
5886       Imm |= Lane[I] << (LANE_SHIFT * I);
5887     }
5888     return true;
5889   }
5890   return false;
5891 }
5892 
5893 bool
5894 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5895   using namespace llvm::AMDGPU::Swizzle;
5896 
5897   SMLoc S = Parser.getTok().getLoc();
5898   int64_t GroupSize;
5899   int64_t LaneIdx;
5900 
5901   if (!parseSwizzleOperands(1, &GroupSize,
5902                             2, 32,
5903                             "group size must be in the interval [2,32]")) {
5904     return false;
5905   }
5906   if (!isPowerOf2_64(GroupSize)) {
5907     Error(S, "group size must be a power of two");
5908     return false;
5909   }
5910   if (parseSwizzleOperands(1, &LaneIdx,
5911                            0, GroupSize - 1,
5912                            "lane id must be in the interval [0,group size - 1]")) {
5913     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5914     return true;
5915   }
5916   return false;
5917 }
5918 
5919 bool
5920 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5921   using namespace llvm::AMDGPU::Swizzle;
5922 
5923   SMLoc S = Parser.getTok().getLoc();
5924   int64_t GroupSize;
5925 
5926   if (!parseSwizzleOperands(1, &GroupSize,
5927       2, 32, "group size must be in the interval [2,32]")) {
5928     return false;
5929   }
5930   if (!isPowerOf2_64(GroupSize)) {
5931     Error(S, "group size must be a power of two");
5932     return false;
5933   }
5934 
5935   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5936   return true;
5937 }
5938 
5939 bool
5940 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5941   using namespace llvm::AMDGPU::Swizzle;
5942 
5943   SMLoc S = Parser.getTok().getLoc();
5944   int64_t GroupSize;
5945 
5946   if (!parseSwizzleOperands(1, &GroupSize,
5947       1, 16, "group size must be in the interval [1,16]")) {
5948     return false;
5949   }
5950   if (!isPowerOf2_64(GroupSize)) {
5951     Error(S, "group size must be a power of two");
5952     return false;
5953   }
5954 
5955   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5956   return true;
5957 }
5958 
5959 bool
5960 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5961   using namespace llvm::AMDGPU::Swizzle;
5962 
5963   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5964     return false;
5965   }
5966 
5967   StringRef Ctl;
5968   SMLoc StrLoc = Parser.getTok().getLoc();
5969   if (!parseString(Ctl)) {
5970     return false;
5971   }
5972   if (Ctl.size() != BITMASK_WIDTH) {
5973     Error(StrLoc, "expected a 5-character mask");
5974     return false;
5975   }
5976 
5977   unsigned AndMask = 0;
5978   unsigned OrMask = 0;
5979   unsigned XorMask = 0;
5980 
5981   for (size_t i = 0; i < Ctl.size(); ++i) {
5982     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5983     switch(Ctl[i]) {
5984     default:
5985       Error(StrLoc, "invalid mask");
5986       return false;
5987     case '0':
5988       break;
5989     case '1':
5990       OrMask |= Mask;
5991       break;
5992     case 'p':
5993       AndMask |= Mask;
5994       break;
5995     case 'i':
5996       AndMask |= Mask;
5997       XorMask |= Mask;
5998       break;
5999     }
6000   }
6001 
6002   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6003   return true;
6004 }
6005 
6006 bool
6007 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6008 
6009   SMLoc OffsetLoc = Parser.getTok().getLoc();
6010 
6011   if (!parseExpr(Imm)) {
6012     return false;
6013   }
6014   if (!isUInt<16>(Imm)) {
6015     Error(OffsetLoc, "expected a 16-bit offset");
6016     return false;
6017   }
6018   return true;
6019 }
6020 
6021 bool
6022 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6023   using namespace llvm::AMDGPU::Swizzle;
6024 
6025   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6026 
6027     SMLoc ModeLoc = Parser.getTok().getLoc();
6028     bool Ok = false;
6029 
6030     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6031       Ok = parseSwizzleQuadPerm(Imm);
6032     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6033       Ok = parseSwizzleBitmaskPerm(Imm);
6034     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6035       Ok = parseSwizzleBroadcast(Imm);
6036     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6037       Ok = parseSwizzleSwap(Imm);
6038     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6039       Ok = parseSwizzleReverse(Imm);
6040     } else {
6041       Error(ModeLoc, "expected a swizzle mode");
6042     }
6043 
6044     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6045   }
6046 
6047   return false;
6048 }
6049 
6050 OperandMatchResultTy
6051 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6052   SMLoc S = Parser.getTok().getLoc();
6053   int64_t Imm = 0;
6054 
6055   if (trySkipId("offset")) {
6056 
6057     bool Ok = false;
6058     if (skipToken(AsmToken::Colon, "expected a colon")) {
6059       if (trySkipId("swizzle")) {
6060         Ok = parseSwizzleMacro(Imm);
6061       } else {
6062         Ok = parseSwizzleOffset(Imm);
6063       }
6064     }
6065 
6066     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6067 
6068     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6069   } else {
6070     // Swizzle "offset" operand is optional.
6071     // If it is omitted, try parsing other optional operands.
6072     return parseOptionalOpr(Operands);
6073   }
6074 }
6075 
6076 bool
6077 AMDGPUOperand::isSwizzle() const {
6078   return isImmTy(ImmTySwizzle);
6079 }
6080 
6081 //===----------------------------------------------------------------------===//
6082 // VGPR Index Mode
6083 //===----------------------------------------------------------------------===//
6084 
6085 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6086 
6087   using namespace llvm::AMDGPU::VGPRIndexMode;
6088 
6089   if (trySkipToken(AsmToken::RParen)) {
6090     return OFF;
6091   }
6092 
6093   int64_t Imm = 0;
6094 
6095   while (true) {
6096     unsigned Mode = 0;
6097     SMLoc S = Parser.getTok().getLoc();
6098 
6099     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6100       if (trySkipId(IdSymbolic[ModeId])) {
6101         Mode = 1 << ModeId;
6102         break;
6103       }
6104     }
6105 
6106     if (Mode == 0) {
6107       Error(S, (Imm == 0)?
6108                "expected a VGPR index mode or a closing parenthesis" :
6109                "expected a VGPR index mode");
6110       break;
6111     }
6112 
6113     if (Imm & Mode) {
6114       Error(S, "duplicate VGPR index mode");
6115       break;
6116     }
6117     Imm |= Mode;
6118 
6119     if (trySkipToken(AsmToken::RParen))
6120       break;
6121     if (!skipToken(AsmToken::Comma,
6122                    "expected a comma or a closing parenthesis"))
6123       break;
6124   }
6125 
6126   return Imm;
6127 }
6128 
6129 OperandMatchResultTy
6130 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6131 
6132   int64_t Imm = 0;
6133   SMLoc S = Parser.getTok().getLoc();
6134 
6135   if (getLexer().getKind() == AsmToken::Identifier &&
6136       Parser.getTok().getString() == "gpr_idx" &&
6137       getLexer().peekTok().is(AsmToken::LParen)) {
6138 
6139     Parser.Lex();
6140     Parser.Lex();
6141 
6142     // If parse failed, trigger an error but do not return error code
6143     // to avoid excessive error messages.
6144     Imm = parseGPRIdxMacro();
6145 
6146   } else {
6147     if (getParser().parseAbsoluteExpression(Imm))
6148       return MatchOperand_NoMatch;
6149     if (Imm < 0 || !isUInt<4>(Imm)) {
6150       Error(S, "invalid immediate: only 4-bit values are legal");
6151     }
6152   }
6153 
6154   Operands.push_back(
6155       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6156   return MatchOperand_Success;
6157 }
6158 
6159 bool AMDGPUOperand::isGPRIdxMode() const {
6160   return isImmTy(ImmTyGprIdxMode);
6161 }
6162 
6163 //===----------------------------------------------------------------------===//
6164 // sopp branch targets
6165 //===----------------------------------------------------------------------===//
6166 
6167 OperandMatchResultTy
6168 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6169 
6170   // Make sure we are not parsing something
6171   // that looks like a label or an expression but is not.
6172   // This will improve error messages.
6173   if (isRegister() || isModifier())
6174     return MatchOperand_NoMatch;
6175 
6176   if (parseExpr(Operands)) {
6177 
6178     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6179     assert(Opr.isImm() || Opr.isExpr());
6180     SMLoc Loc = Opr.getStartLoc();
6181 
6182     // Currently we do not support arbitrary expressions as branch targets.
6183     // Only labels and absolute expressions are accepted.
6184     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6185       Error(Loc, "expected an absolute expression or a label");
6186     } else if (Opr.isImm() && !Opr.isS16Imm()) {
6187       Error(Loc, "expected a 16-bit signed jump offset");
6188     }
6189   }
6190 
6191   return MatchOperand_Success; // avoid excessive error messages
6192 }
6193 
6194 //===----------------------------------------------------------------------===//
6195 // Boolean holding registers
6196 //===----------------------------------------------------------------------===//
6197 
6198 OperandMatchResultTy
6199 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6200   return parseReg(Operands);
6201 }
6202 
6203 //===----------------------------------------------------------------------===//
6204 // mubuf
6205 //===----------------------------------------------------------------------===//
6206 
6207 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6208   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6209 }
6210 
6211 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6212   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6213 }
6214 
6215 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6216   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6217 }
6218 
6219 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6220                                const OperandVector &Operands,
6221                                bool IsAtomic,
6222                                bool IsAtomicReturn,
6223                                bool IsLds) {
6224   bool IsLdsOpcode = IsLds;
6225   bool HasLdsModifier = false;
6226   OptionalImmIndexMap OptionalIdx;
6227   assert(IsAtomicReturn ? IsAtomic : true);
6228   unsigned FirstOperandIdx = 1;
6229 
6230   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6231     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6232 
6233     // Add the register arguments
6234     if (Op.isReg()) {
6235       Op.addRegOperands(Inst, 1);
6236       // Insert a tied src for atomic return dst.
6237       // This cannot be postponed as subsequent calls to
6238       // addImmOperands rely on correct number of MC operands.
6239       if (IsAtomicReturn && i == FirstOperandIdx)
6240         Op.addRegOperands(Inst, 1);
6241       continue;
6242     }
6243 
6244     // Handle the case where soffset is an immediate
6245     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6246       Op.addImmOperands(Inst, 1);
6247       continue;
6248     }
6249 
6250     HasLdsModifier |= Op.isLDS();
6251 
6252     // Handle tokens like 'offen' which are sometimes hard-coded into the
6253     // asm string.  There are no MCInst operands for these.
6254     if (Op.isToken()) {
6255       continue;
6256     }
6257     assert(Op.isImm());
6258 
6259     // Handle optional arguments
6260     OptionalIdx[Op.getImmTy()] = i;
6261   }
6262 
6263   // This is a workaround for an llvm quirk which may result in an
6264   // incorrect instruction selection. Lds and non-lds versions of
6265   // MUBUF instructions are identical except that lds versions
6266   // have mandatory 'lds' modifier. However this modifier follows
6267   // optional modifiers and llvm asm matcher regards this 'lds'
6268   // modifier as an optional one. As a result, an lds version
6269   // of opcode may be selected even if it has no 'lds' modifier.
6270   if (IsLdsOpcode && !HasLdsModifier) {
6271     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6272     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6273       Inst.setOpcode(NoLdsOpcode);
6274       IsLdsOpcode = false;
6275     }
6276   }
6277 
6278   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6279   if (!IsAtomic) { // glc is hard-coded.
6280     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6281   }
6282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6283 
6284   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6285     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6286   }
6287 
6288   if (isGFX10())
6289     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6290 }
6291 
6292 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6293   OptionalImmIndexMap OptionalIdx;
6294 
6295   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6296     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6297 
6298     // Add the register arguments
6299     if (Op.isReg()) {
6300       Op.addRegOperands(Inst, 1);
6301       continue;
6302     }
6303 
6304     // Handle the case where soffset is an immediate
6305     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6306       Op.addImmOperands(Inst, 1);
6307       continue;
6308     }
6309 
6310     // Handle tokens like 'offen' which are sometimes hard-coded into the
6311     // asm string.  There are no MCInst operands for these.
6312     if (Op.isToken()) {
6313       continue;
6314     }
6315     assert(Op.isImm());
6316 
6317     // Handle optional arguments
6318     OptionalIdx[Op.getImmTy()] = i;
6319   }
6320 
6321   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6322                         AMDGPUOperand::ImmTyOffset);
6323   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6324   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6325   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6326   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6327 
6328   if (isGFX10())
6329     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6330 }
6331 
6332 //===----------------------------------------------------------------------===//
6333 // mimg
6334 //===----------------------------------------------------------------------===//
6335 
6336 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6337                               bool IsAtomic) {
6338   unsigned I = 1;
6339   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6340   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6341     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6342   }
6343 
6344   if (IsAtomic) {
6345     // Add src, same as dst
6346     assert(Desc.getNumDefs() == 1);
6347     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6348   }
6349 
6350   OptionalImmIndexMap OptionalIdx;
6351 
6352   for (unsigned E = Operands.size(); I != E; ++I) {
6353     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6354 
6355     // Add the register arguments
6356     if (Op.isReg()) {
6357       Op.addRegOperands(Inst, 1);
6358     } else if (Op.isImmModifier()) {
6359       OptionalIdx[Op.getImmTy()] = I;
6360     } else if (!Op.isToken()) {
6361       llvm_unreachable("unexpected operand type");
6362     }
6363   }
6364 
6365   bool IsGFX10 = isGFX10();
6366 
6367   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6368   if (IsGFX10)
6369     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6370   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6371   if (IsGFX10)
6372     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6373   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6374   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6375   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6376   if (IsGFX10)
6377     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6378   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6379   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6380   if (!IsGFX10)
6381     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6382   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6383 }
6384 
6385 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6386   cvtMIMG(Inst, Operands, true);
6387 }
6388 
6389 //===----------------------------------------------------------------------===//
6390 // smrd
6391 //===----------------------------------------------------------------------===//
6392 
6393 bool AMDGPUOperand::isSMRDOffset8() const {
6394   return isImm() && isUInt<8>(getImm());
6395 }
6396 
6397 bool AMDGPUOperand::isSMEMOffset() const {
6398   return isImm(); // Offset range is checked later by validator.
6399 }
6400 
6401 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6402   // 32-bit literals are only supported on CI and we only want to use them
6403   // when the offset is > 8-bits.
6404   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6405 }
6406 
6407 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6408   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6409 }
6410 
6411 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6412   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6413 }
6414 
6415 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6416   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6417 }
6418 
6419 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6420   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6421 }
6422 
6423 //===----------------------------------------------------------------------===//
6424 // vop3
6425 //===----------------------------------------------------------------------===//
6426 
6427 static bool ConvertOmodMul(int64_t &Mul) {
6428   if (Mul != 1 && Mul != 2 && Mul != 4)
6429     return false;
6430 
6431   Mul >>= 1;
6432   return true;
6433 }
6434 
6435 static bool ConvertOmodDiv(int64_t &Div) {
6436   if (Div == 1) {
6437     Div = 0;
6438     return true;
6439   }
6440 
6441   if (Div == 2) {
6442     Div = 3;
6443     return true;
6444   }
6445 
6446   return false;
6447 }
6448 
6449 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6450   if (BoundCtrl == 0) {
6451     BoundCtrl = 1;
6452     return true;
6453   }
6454 
6455   if (BoundCtrl == -1) {
6456     BoundCtrl = 0;
6457     return true;
6458   }
6459 
6460   return false;
6461 }
6462 
6463 // Note: the order in this table matches the order of operands in AsmString.
6464 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6465   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6466   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6467   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6468   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6469   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6470   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6471   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6472   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6473   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6474   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6475   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6476   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6477   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6478   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6479   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6480   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6481   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6482   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6483   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6484   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6485   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6486   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6487   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6488   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6489   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6490   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6491   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6492   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6493   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6494   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6495   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6496   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6497   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6498   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6499   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6500   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6501   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6502   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6503   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6504   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6505   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6506   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6507   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6508 };
6509 
6510 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6511 
6512   OperandMatchResultTy res = parseOptionalOpr(Operands);
6513 
6514   // This is a hack to enable hardcoded mandatory operands which follow
6515   // optional operands.
6516   //
6517   // Current design assumes that all operands after the first optional operand
6518   // are also optional. However implementation of some instructions violates
6519   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6520   //
6521   // To alleviate this problem, we have to (implicitly) parse extra operands
6522   // to make sure autogenerated parser of custom operands never hit hardcoded
6523   // mandatory operands.
6524 
6525   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6526     if (res != MatchOperand_Success ||
6527         isToken(AsmToken::EndOfStatement))
6528       break;
6529 
6530     trySkipToken(AsmToken::Comma);
6531     res = parseOptionalOpr(Operands);
6532   }
6533 
6534   return res;
6535 }
6536 
6537 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6538   OperandMatchResultTy res;
6539   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6540     // try to parse any optional operand here
6541     if (Op.IsBit) {
6542       res = parseNamedBit(Op.Name, Operands, Op.Type);
6543     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6544       res = parseOModOperand(Operands);
6545     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6546                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6547                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6548       res = parseSDWASel(Operands, Op.Name, Op.Type);
6549     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6550       res = parseSDWADstUnused(Operands);
6551     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6552                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6553                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6554                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6555       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6556                                         Op.ConvertResult);
6557     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6558       res = parseDim(Operands);
6559     } else {
6560       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6561     }
6562     if (res != MatchOperand_NoMatch) {
6563       return res;
6564     }
6565   }
6566   return MatchOperand_NoMatch;
6567 }
6568 
6569 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6570   StringRef Name = Parser.getTok().getString();
6571   if (Name == "mul") {
6572     return parseIntWithPrefix("mul", Operands,
6573                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6574   }
6575 
6576   if (Name == "div") {
6577     return parseIntWithPrefix("div", Operands,
6578                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6579   }
6580 
6581   return MatchOperand_NoMatch;
6582 }
6583 
6584 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6585   cvtVOP3P(Inst, Operands);
6586 
6587   int Opc = Inst.getOpcode();
6588 
6589   int SrcNum;
6590   const int Ops[] = { AMDGPU::OpName::src0,
6591                       AMDGPU::OpName::src1,
6592                       AMDGPU::OpName::src2 };
6593   for (SrcNum = 0;
6594        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6595        ++SrcNum);
6596   assert(SrcNum > 0);
6597 
6598   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6599   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6600 
6601   if ((OpSel & (1 << SrcNum)) != 0) {
6602     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6603     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6604     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6605   }
6606 }
6607 
6608 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6609       // 1. This operand is input modifiers
6610   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6611       // 2. This is not last operand
6612       && Desc.NumOperands > (OpNum + 1)
6613       // 3. Next operand is register class
6614       && Desc.OpInfo[OpNum + 1].RegClass != -1
6615       // 4. Next register is not tied to any other operand
6616       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6617 }
6618 
6619 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6620 {
6621   OptionalImmIndexMap OptionalIdx;
6622   unsigned Opc = Inst.getOpcode();
6623 
6624   unsigned I = 1;
6625   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6626   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6627     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6628   }
6629 
6630   for (unsigned E = Operands.size(); I != E; ++I) {
6631     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6632     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6633       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6634     } else if (Op.isInterpSlot() ||
6635                Op.isInterpAttr() ||
6636                Op.isAttrChan()) {
6637       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6638     } else if (Op.isImmModifier()) {
6639       OptionalIdx[Op.getImmTy()] = I;
6640     } else {
6641       llvm_unreachable("unhandled operand type");
6642     }
6643   }
6644 
6645   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6646     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6647   }
6648 
6649   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6650     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6651   }
6652 
6653   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6654     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6655   }
6656 }
6657 
6658 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6659                               OptionalImmIndexMap &OptionalIdx) {
6660   unsigned Opc = Inst.getOpcode();
6661 
6662   unsigned I = 1;
6663   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6664   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6665     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6666   }
6667 
6668   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6669     // This instruction has src modifiers
6670     for (unsigned E = Operands.size(); I != E; ++I) {
6671       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6672       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6673         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6674       } else if (Op.isImmModifier()) {
6675         OptionalIdx[Op.getImmTy()] = I;
6676       } else if (Op.isRegOrImm()) {
6677         Op.addRegOrImmOperands(Inst, 1);
6678       } else {
6679         llvm_unreachable("unhandled operand type");
6680       }
6681     }
6682   } else {
6683     // No src modifiers
6684     for (unsigned E = Operands.size(); I != E; ++I) {
6685       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6686       if (Op.isMod()) {
6687         OptionalIdx[Op.getImmTy()] = I;
6688       } else {
6689         Op.addRegOrImmOperands(Inst, 1);
6690       }
6691     }
6692   }
6693 
6694   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6695     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6696   }
6697 
6698   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6699     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6700   }
6701 
6702   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6703   // it has src2 register operand that is tied to dst operand
6704   // we don't allow modifiers for this operand in assembler so src2_modifiers
6705   // should be 0.
6706   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6707       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6708       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6709       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6710       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6711       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6712       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6713     auto it = Inst.begin();
6714     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6715     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6716     ++it;
6717     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6718   }
6719 }
6720 
6721 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6722   OptionalImmIndexMap OptionalIdx;
6723   cvtVOP3(Inst, Operands, OptionalIdx);
6724 }
6725 
6726 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6727                                const OperandVector &Operands) {
6728   OptionalImmIndexMap OptIdx;
6729   const int Opc = Inst.getOpcode();
6730   const MCInstrDesc &Desc = MII.get(Opc);
6731 
6732   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6733 
6734   cvtVOP3(Inst, Operands, OptIdx);
6735 
6736   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6737     assert(!IsPacked);
6738     Inst.addOperand(Inst.getOperand(0));
6739   }
6740 
6741   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6742   // instruction, and then figure out where to actually put the modifiers
6743 
6744   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6745 
6746   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6747   if (OpSelHiIdx != -1) {
6748     int DefaultVal = IsPacked ? -1 : 0;
6749     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6750                           DefaultVal);
6751   }
6752 
6753   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6754   if (NegLoIdx != -1) {
6755     assert(IsPacked);
6756     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6757     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6758   }
6759 
6760   const int Ops[] = { AMDGPU::OpName::src0,
6761                       AMDGPU::OpName::src1,
6762                       AMDGPU::OpName::src2 };
6763   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6764                          AMDGPU::OpName::src1_modifiers,
6765                          AMDGPU::OpName::src2_modifiers };
6766 
6767   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6768 
6769   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6770   unsigned OpSelHi = 0;
6771   unsigned NegLo = 0;
6772   unsigned NegHi = 0;
6773 
6774   if (OpSelHiIdx != -1) {
6775     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6776   }
6777 
6778   if (NegLoIdx != -1) {
6779     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6780     NegLo = Inst.getOperand(NegLoIdx).getImm();
6781     NegHi = Inst.getOperand(NegHiIdx).getImm();
6782   }
6783 
6784   for (int J = 0; J < 3; ++J) {
6785     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6786     if (OpIdx == -1)
6787       break;
6788 
6789     uint32_t ModVal = 0;
6790 
6791     if ((OpSel & (1 << J)) != 0)
6792       ModVal |= SISrcMods::OP_SEL_0;
6793 
6794     if ((OpSelHi & (1 << J)) != 0)
6795       ModVal |= SISrcMods::OP_SEL_1;
6796 
6797     if ((NegLo & (1 << J)) != 0)
6798       ModVal |= SISrcMods::NEG;
6799 
6800     if ((NegHi & (1 << J)) != 0)
6801       ModVal |= SISrcMods::NEG_HI;
6802 
6803     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6804 
6805     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6806   }
6807 }
6808 
6809 //===----------------------------------------------------------------------===//
6810 // dpp
6811 //===----------------------------------------------------------------------===//
6812 
6813 bool AMDGPUOperand::isDPP8() const {
6814   return isImmTy(ImmTyDPP8);
6815 }
6816 
6817 bool AMDGPUOperand::isDPPCtrl() const {
6818   using namespace AMDGPU::DPP;
6819 
6820   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6821   if (result) {
6822     int64_t Imm = getImm();
6823     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6824            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6825            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6826            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6827            (Imm == DppCtrl::WAVE_SHL1) ||
6828            (Imm == DppCtrl::WAVE_ROL1) ||
6829            (Imm == DppCtrl::WAVE_SHR1) ||
6830            (Imm == DppCtrl::WAVE_ROR1) ||
6831            (Imm == DppCtrl::ROW_MIRROR) ||
6832            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6833            (Imm == DppCtrl::BCAST15) ||
6834            (Imm == DppCtrl::BCAST31) ||
6835            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6836            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6837   }
6838   return false;
6839 }
6840 
6841 //===----------------------------------------------------------------------===//
6842 // mAI
6843 //===----------------------------------------------------------------------===//
6844 
6845 bool AMDGPUOperand::isBLGP() const {
6846   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6847 }
6848 
6849 bool AMDGPUOperand::isCBSZ() const {
6850   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6851 }
6852 
6853 bool AMDGPUOperand::isABID() const {
6854   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6855 }
6856 
6857 bool AMDGPUOperand::isS16Imm() const {
6858   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6859 }
6860 
6861 bool AMDGPUOperand::isU16Imm() const {
6862   return isImm() && isUInt<16>(getImm());
6863 }
6864 
6865 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6866   if (!isGFX10())
6867     return MatchOperand_NoMatch;
6868 
6869   SMLoc S = Parser.getTok().getLoc();
6870 
6871   if (getLexer().isNot(AsmToken::Identifier))
6872     return MatchOperand_NoMatch;
6873   if (getLexer().getTok().getString() != "dim")
6874     return MatchOperand_NoMatch;
6875 
6876   Parser.Lex();
6877   if (getLexer().isNot(AsmToken::Colon))
6878     return MatchOperand_ParseFail;
6879 
6880   Parser.Lex();
6881 
6882   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6883   // integer.
6884   std::string Token;
6885   if (getLexer().is(AsmToken::Integer)) {
6886     SMLoc Loc = getLexer().getTok().getEndLoc();
6887     Token = std::string(getLexer().getTok().getString());
6888     Parser.Lex();
6889     if (getLexer().getTok().getLoc() != Loc)
6890       return MatchOperand_ParseFail;
6891   }
6892   if (getLexer().isNot(AsmToken::Identifier))
6893     return MatchOperand_ParseFail;
6894   Token += getLexer().getTok().getString();
6895 
6896   StringRef DimId = Token;
6897   if (DimId.startswith("SQ_RSRC_IMG_"))
6898     DimId = DimId.substr(12);
6899 
6900   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6901   if (!DimInfo)
6902     return MatchOperand_ParseFail;
6903 
6904   Parser.Lex();
6905 
6906   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6907                                               AMDGPUOperand::ImmTyDim));
6908   return MatchOperand_Success;
6909 }
6910 
6911 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6912   SMLoc S = Parser.getTok().getLoc();
6913   StringRef Prefix;
6914 
6915   if (getLexer().getKind() == AsmToken::Identifier) {
6916     Prefix = Parser.getTok().getString();
6917   } else {
6918     return MatchOperand_NoMatch;
6919   }
6920 
6921   if (Prefix != "dpp8")
6922     return parseDPPCtrl(Operands);
6923   if (!isGFX10())
6924     return MatchOperand_NoMatch;
6925 
6926   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6927 
6928   int64_t Sels[8];
6929 
6930   Parser.Lex();
6931   if (getLexer().isNot(AsmToken::Colon))
6932     return MatchOperand_ParseFail;
6933 
6934   Parser.Lex();
6935   if (getLexer().isNot(AsmToken::LBrac))
6936     return MatchOperand_ParseFail;
6937 
6938   Parser.Lex();
6939   if (getParser().parseAbsoluteExpression(Sels[0]))
6940     return MatchOperand_ParseFail;
6941   if (0 > Sels[0] || 7 < Sels[0])
6942     return MatchOperand_ParseFail;
6943 
6944   for (size_t i = 1; i < 8; ++i) {
6945     if (getLexer().isNot(AsmToken::Comma))
6946       return MatchOperand_ParseFail;
6947 
6948     Parser.Lex();
6949     if (getParser().parseAbsoluteExpression(Sels[i]))
6950       return MatchOperand_ParseFail;
6951     if (0 > Sels[i] || 7 < Sels[i])
6952       return MatchOperand_ParseFail;
6953   }
6954 
6955   if (getLexer().isNot(AsmToken::RBrac))
6956     return MatchOperand_ParseFail;
6957   Parser.Lex();
6958 
6959   unsigned DPP8 = 0;
6960   for (size_t i = 0; i < 8; ++i)
6961     DPP8 |= (Sels[i] << (i * 3));
6962 
6963   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6964   return MatchOperand_Success;
6965 }
6966 
6967 OperandMatchResultTy
6968 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6969   using namespace AMDGPU::DPP;
6970 
6971   SMLoc S = Parser.getTok().getLoc();
6972   StringRef Prefix;
6973   int64_t Int;
6974 
6975   if (getLexer().getKind() == AsmToken::Identifier) {
6976     Prefix = Parser.getTok().getString();
6977   } else {
6978     return MatchOperand_NoMatch;
6979   }
6980 
6981   if (Prefix == "row_mirror") {
6982     Int = DppCtrl::ROW_MIRROR;
6983     Parser.Lex();
6984   } else if (Prefix == "row_half_mirror") {
6985     Int = DppCtrl::ROW_HALF_MIRROR;
6986     Parser.Lex();
6987   } else {
6988     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6989     if (Prefix != "quad_perm"
6990         && Prefix != "row_shl"
6991         && Prefix != "row_shr"
6992         && Prefix != "row_ror"
6993         && Prefix != "wave_shl"
6994         && Prefix != "wave_rol"
6995         && Prefix != "wave_shr"
6996         && Prefix != "wave_ror"
6997         && Prefix != "row_bcast"
6998         && Prefix != "row_share"
6999         && Prefix != "row_xmask") {
7000       return MatchOperand_NoMatch;
7001     }
7002 
7003     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7004       return MatchOperand_NoMatch;
7005 
7006     if (!isVI() && !isGFX9() &&
7007         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7008          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7009          Prefix == "row_bcast"))
7010       return MatchOperand_NoMatch;
7011 
7012     Parser.Lex();
7013     if (getLexer().isNot(AsmToken::Colon))
7014       return MatchOperand_ParseFail;
7015 
7016     if (Prefix == "quad_perm") {
7017       // quad_perm:[%d,%d,%d,%d]
7018       Parser.Lex();
7019       if (getLexer().isNot(AsmToken::LBrac))
7020         return MatchOperand_ParseFail;
7021       Parser.Lex();
7022 
7023       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7024         return MatchOperand_ParseFail;
7025 
7026       for (int i = 0; i < 3; ++i) {
7027         if (getLexer().isNot(AsmToken::Comma))
7028           return MatchOperand_ParseFail;
7029         Parser.Lex();
7030 
7031         int64_t Temp;
7032         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7033           return MatchOperand_ParseFail;
7034         const int shift = i*2 + 2;
7035         Int += (Temp << shift);
7036       }
7037 
7038       if (getLexer().isNot(AsmToken::RBrac))
7039         return MatchOperand_ParseFail;
7040       Parser.Lex();
7041     } else {
7042       // sel:%d
7043       Parser.Lex();
7044       if (getParser().parseAbsoluteExpression(Int))
7045         return MatchOperand_ParseFail;
7046 
7047       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7048         Int |= DppCtrl::ROW_SHL0;
7049       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7050         Int |= DppCtrl::ROW_SHR0;
7051       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7052         Int |= DppCtrl::ROW_ROR0;
7053       } else if (Prefix == "wave_shl" && 1 == Int) {
7054         Int = DppCtrl::WAVE_SHL1;
7055       } else if (Prefix == "wave_rol" && 1 == Int) {
7056         Int = DppCtrl::WAVE_ROL1;
7057       } else if (Prefix == "wave_shr" && 1 == Int) {
7058         Int = DppCtrl::WAVE_SHR1;
7059       } else if (Prefix == "wave_ror" && 1 == Int) {
7060         Int = DppCtrl::WAVE_ROR1;
7061       } else if (Prefix == "row_bcast") {
7062         if (Int == 15) {
7063           Int = DppCtrl::BCAST15;
7064         } else if (Int == 31) {
7065           Int = DppCtrl::BCAST31;
7066         } else {
7067           return MatchOperand_ParseFail;
7068         }
7069       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7070         Int |= DppCtrl::ROW_SHARE_FIRST;
7071       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7072         Int |= DppCtrl::ROW_XMASK_FIRST;
7073       } else {
7074         return MatchOperand_ParseFail;
7075       }
7076     }
7077   }
7078 
7079   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7080   return MatchOperand_Success;
7081 }
7082 
7083 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7084   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7085 }
7086 
7087 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7088   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7089 }
7090 
7091 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7092   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7093 }
7094 
7095 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7096   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7097 }
7098 
7099 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7100   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7101 }
7102 
7103 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7104   OptionalImmIndexMap OptionalIdx;
7105 
7106   unsigned I = 1;
7107   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7108   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7109     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7110   }
7111 
7112   int Fi = 0;
7113   for (unsigned E = Operands.size(); I != E; ++I) {
7114     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7115                                             MCOI::TIED_TO);
7116     if (TiedTo != -1) {
7117       assert((unsigned)TiedTo < Inst.getNumOperands());
7118       // handle tied old or src2 for MAC instructions
7119       Inst.addOperand(Inst.getOperand(TiedTo));
7120     }
7121     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7122     // Add the register arguments
7123     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7124       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7125       // Skip it.
7126       continue;
7127     }
7128 
7129     if (IsDPP8) {
7130       if (Op.isDPP8()) {
7131         Op.addImmOperands(Inst, 1);
7132       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7133         Op.addRegWithFPInputModsOperands(Inst, 2);
7134       } else if (Op.isFI()) {
7135         Fi = Op.getImm();
7136       } else if (Op.isReg()) {
7137         Op.addRegOperands(Inst, 1);
7138       } else {
7139         llvm_unreachable("Invalid operand type");
7140       }
7141     } else {
7142       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7143         Op.addRegWithFPInputModsOperands(Inst, 2);
7144       } else if (Op.isDPPCtrl()) {
7145         Op.addImmOperands(Inst, 1);
7146       } else if (Op.isImm()) {
7147         // Handle optional arguments
7148         OptionalIdx[Op.getImmTy()] = I;
7149       } else {
7150         llvm_unreachable("Invalid operand type");
7151       }
7152     }
7153   }
7154 
7155   if (IsDPP8) {
7156     using namespace llvm::AMDGPU::DPP;
7157     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7158   } else {
7159     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7160     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7161     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7162     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7163       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7164     }
7165   }
7166 }
7167 
7168 //===----------------------------------------------------------------------===//
7169 // sdwa
7170 //===----------------------------------------------------------------------===//
7171 
7172 OperandMatchResultTy
7173 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7174                               AMDGPUOperand::ImmTy Type) {
7175   using namespace llvm::AMDGPU::SDWA;
7176 
7177   SMLoc S = Parser.getTok().getLoc();
7178   StringRef Value;
7179   OperandMatchResultTy res;
7180 
7181   res = parseStringWithPrefix(Prefix, Value);
7182   if (res != MatchOperand_Success) {
7183     return res;
7184   }
7185 
7186   int64_t Int;
7187   Int = StringSwitch<int64_t>(Value)
7188         .Case("BYTE_0", SdwaSel::BYTE_0)
7189         .Case("BYTE_1", SdwaSel::BYTE_1)
7190         .Case("BYTE_2", SdwaSel::BYTE_2)
7191         .Case("BYTE_3", SdwaSel::BYTE_3)
7192         .Case("WORD_0", SdwaSel::WORD_0)
7193         .Case("WORD_1", SdwaSel::WORD_1)
7194         .Case("DWORD", SdwaSel::DWORD)
7195         .Default(0xffffffff);
7196   Parser.Lex(); // eat last token
7197 
7198   if (Int == 0xffffffff) {
7199     return MatchOperand_ParseFail;
7200   }
7201 
7202   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7203   return MatchOperand_Success;
7204 }
7205 
7206 OperandMatchResultTy
7207 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7208   using namespace llvm::AMDGPU::SDWA;
7209 
7210   SMLoc S = Parser.getTok().getLoc();
7211   StringRef Value;
7212   OperandMatchResultTy res;
7213 
7214   res = parseStringWithPrefix("dst_unused", Value);
7215   if (res != MatchOperand_Success) {
7216     return res;
7217   }
7218 
7219   int64_t Int;
7220   Int = StringSwitch<int64_t>(Value)
7221         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7222         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7223         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7224         .Default(0xffffffff);
7225   Parser.Lex(); // eat last token
7226 
7227   if (Int == 0xffffffff) {
7228     return MatchOperand_ParseFail;
7229   }
7230 
7231   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7232   return MatchOperand_Success;
7233 }
7234 
7235 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7236   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7237 }
7238 
7239 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7240   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7241 }
7242 
7243 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7244   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7245 }
7246 
7247 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7248   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7249 }
7250 
7251 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7252   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7253 }
7254 
7255 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7256                               uint64_t BasicInstType,
7257                               bool SkipDstVcc,
7258                               bool SkipSrcVcc) {
7259   using namespace llvm::AMDGPU::SDWA;
7260 
7261   OptionalImmIndexMap OptionalIdx;
7262   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7263   bool SkippedVcc = false;
7264 
7265   unsigned I = 1;
7266   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7267   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7268     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7269   }
7270 
7271   for (unsigned E = Operands.size(); I != E; ++I) {
7272     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7273     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7274         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7275       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7276       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7277       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7278       // Skip VCC only if we didn't skip it on previous iteration.
7279       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7280       if (BasicInstType == SIInstrFlags::VOP2 &&
7281           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7282            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7283         SkippedVcc = true;
7284         continue;
7285       } else if (BasicInstType == SIInstrFlags::VOPC &&
7286                  Inst.getNumOperands() == 0) {
7287         SkippedVcc = true;
7288         continue;
7289       }
7290     }
7291     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7292       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7293     } else if (Op.isImm()) {
7294       // Handle optional arguments
7295       OptionalIdx[Op.getImmTy()] = I;
7296     } else {
7297       llvm_unreachable("Invalid operand type");
7298     }
7299     SkippedVcc = false;
7300   }
7301 
7302   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7303       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7304       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7305     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7306     switch (BasicInstType) {
7307     case SIInstrFlags::VOP1:
7308       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7309       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7310         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7311       }
7312       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7313       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7314       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7315       break;
7316 
7317     case SIInstrFlags::VOP2:
7318       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7319       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7320         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7321       }
7322       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7323       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7324       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7325       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7326       break;
7327 
7328     case SIInstrFlags::VOPC:
7329       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7330         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7331       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7332       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7333       break;
7334 
7335     default:
7336       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7337     }
7338   }
7339 
7340   // special case v_mac_{f16, f32}:
7341   // it has src2 register operand that is tied to dst operand
7342   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7343       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7344     auto it = Inst.begin();
7345     std::advance(
7346       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7347     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7348   }
7349 }
7350 
7351 //===----------------------------------------------------------------------===//
7352 // mAI
7353 //===----------------------------------------------------------------------===//
7354 
7355 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7356   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7357 }
7358 
7359 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7360   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7361 }
7362 
7363 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7364   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7365 }
7366 
7367 /// Force static initialization.
7368 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7369   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7370   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7371 }
7372 
7373 #define GET_REGISTER_MATCHER
7374 #define GET_MATCHER_IMPLEMENTATION
7375 #define GET_MNEMONIC_SPELL_CHECKER
7376 #include "AMDGPUGenAsmMatcher.inc"
7377 
7378 // This fuction should be defined after auto-generated include so that we have
7379 // MatchClassKind enum defined
7380 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7381                                                      unsigned Kind) {
7382   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7383   // But MatchInstructionImpl() expects to meet token and fails to validate
7384   // operand. This method checks if we are given immediate operand but expect to
7385   // get corresponding token.
7386   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7387   switch (Kind) {
7388   case MCK_addr64:
7389     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7390   case MCK_gds:
7391     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7392   case MCK_lds:
7393     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7394   case MCK_glc:
7395     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7396   case MCK_idxen:
7397     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7398   case MCK_offen:
7399     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7400   case MCK_SSrcB32:
7401     // When operands have expression values, they will return true for isToken,
7402     // because it is not possible to distinguish between a token and an
7403     // expression at parse time. MatchInstructionImpl() will always try to
7404     // match an operand as a token, when isToken returns true, and when the
7405     // name of the expression is not a valid token, the match will fail,
7406     // so we need to handle it here.
7407     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7408   case MCK_SSrcF32:
7409     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7410   case MCK_SoppBrTarget:
7411     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7412   case MCK_VReg32OrOff:
7413     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7414   case MCK_InterpSlot:
7415     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7416   case MCK_Attr:
7417     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7418   case MCK_AttrChan:
7419     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7420   case MCK_ImmSMEMOffset:
7421     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7422   case MCK_SReg_64:
7423   case MCK_SReg_64_XEXEC:
7424     // Null is defined as a 32-bit register but
7425     // it should also be enabled with 64-bit operands.
7426     // The following code enables it for SReg_64 operands
7427     // used as source and destination. Remaining source
7428     // operands are handled in isInlinableImm.
7429     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7430   default:
7431     return Match_InvalidOperand;
7432   }
7433 }
7434 
7435 //===----------------------------------------------------------------------===//
7436 // endpgm
7437 //===----------------------------------------------------------------------===//
7438 
7439 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7440   SMLoc S = Parser.getTok().getLoc();
7441   int64_t Imm = 0;
7442 
7443   if (!parseExpr(Imm)) {
7444     // The operand is optional, if not present default to 0
7445     Imm = 0;
7446   }
7447 
7448   if (!isUInt<16>(Imm)) {
7449     Error(S, "expected a 16-bit value");
7450     return MatchOperand_ParseFail;
7451   }
7452 
7453   Operands.push_back(
7454       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7455   return MatchOperand_Success;
7456 }
7457 
7458 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7459