1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth,
1092                          SMLoc Loc);
1093 
1094   bool isRegister();
1095   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1096   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1097   void initializeGprCountSymbol(RegisterKind RegKind);
1098   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1099                              unsigned RegWidth);
1100   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1101                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1102   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1103                  bool IsGdsHardcoded);
1104 
1105 public:
1106   enum AMDGPUMatchResultTy {
1107     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1108   };
1109   enum OperandMode {
1110     OperandMode_Default,
1111     OperandMode_NSA,
1112   };
1113 
1114   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1115 
1116   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1117                const MCInstrInfo &MII,
1118                const MCTargetOptions &Options)
1119       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1120     MCAsmParserExtension::Initialize(Parser);
1121 
1122     if (getFeatureBits().none()) {
1123       // Set default features.
1124       copySTI().ToggleFeature("southern-islands");
1125     }
1126 
1127     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1128 
1129     {
1130       // TODO: make those pre-defined variables read-only.
1131       // Currently there is none suitable machinery in the core llvm-mc for this.
1132       // MCSymbol::isRedefinable is intended for another purpose, and
1133       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1134       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1135       MCContext &Ctx = getContext();
1136       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1137         MCSymbol *Sym =
1138             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1144       } else {
1145         MCSymbol *Sym =
1146             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1147         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1148         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1149         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1150         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1151         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1152       }
1153       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1154         initializeGprCountSymbol(IS_VGPR);
1155         initializeGprCountSymbol(IS_SGPR);
1156       } else
1157         KernelScope.initialize(getContext());
1158     }
1159   }
1160 
1161   bool hasXNACK() const {
1162     return AMDGPU::hasXNACK(getSTI());
1163   }
1164 
1165   bool hasMIMG_R128() const {
1166     return AMDGPU::hasMIMG_R128(getSTI());
1167   }
1168 
1169   bool hasPackedD16() const {
1170     return AMDGPU::hasPackedD16(getSTI());
1171   }
1172 
1173   bool hasGFX10A16() const {
1174     return AMDGPU::hasGFX10A16(getSTI());
1175   }
1176 
1177   bool isSI() const {
1178     return AMDGPU::isSI(getSTI());
1179   }
1180 
1181   bool isCI() const {
1182     return AMDGPU::isCI(getSTI());
1183   }
1184 
1185   bool isVI() const {
1186     return AMDGPU::isVI(getSTI());
1187   }
1188 
1189   bool isGFX9() const {
1190     return AMDGPU::isGFX9(getSTI());
1191   }
1192 
1193   bool isGFX9Plus() const {
1194     return AMDGPU::isGFX9Plus(getSTI());
1195   }
1196 
1197   bool isGFX10() const {
1198     return AMDGPU::isGFX10(getSTI());
1199   }
1200 
1201   bool isGFX10_BEncoding() const {
1202     return AMDGPU::isGFX10_BEncoding(getSTI());
1203   }
1204 
1205   bool hasInv2PiInlineImm() const {
1206     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1207   }
1208 
1209   bool hasFlatOffsets() const {
1210     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1211   }
1212 
1213   bool hasSGPR102_SGPR103() const {
1214     return !isVI() && !isGFX9();
1215   }
1216 
1217   bool hasSGPR104_SGPR105() const {
1218     return isGFX10();
1219   }
1220 
1221   bool hasIntClamp() const {
1222     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1223   }
1224 
1225   AMDGPUTargetStreamer &getTargetStreamer() {
1226     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1227     return static_cast<AMDGPUTargetStreamer &>(TS);
1228   }
1229 
1230   const MCRegisterInfo *getMRI() const {
1231     // We need this const_cast because for some reason getContext() is not const
1232     // in MCAsmParser.
1233     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1234   }
1235 
1236   const MCInstrInfo *getMII() const {
1237     return &MII;
1238   }
1239 
1240   const FeatureBitset &getFeatureBits() const {
1241     return getSTI().getFeatureBits();
1242   }
1243 
1244   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1245   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1246   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1247 
1248   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1249   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1250   bool isForcedDPP() const { return ForcedDPP; }
1251   bool isForcedSDWA() const { return ForcedSDWA; }
1252   ArrayRef<unsigned> getMatchedVariants() const;
1253   StringRef getMatchedVariantName() const;
1254 
1255   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1256   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1257                      bool RestoreOnFailure);
1258   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1259   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1260                                         SMLoc &EndLoc) override;
1261   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1262   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1263                                       unsigned Kind) override;
1264   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1265                                OperandVector &Operands, MCStreamer &Out,
1266                                uint64_t &ErrorInfo,
1267                                bool MatchingInlineAsm) override;
1268   bool ParseDirective(AsmToken DirectiveID) override;
1269   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1270                                     OperandMode Mode = OperandMode_Default);
1271   StringRef parseMnemonicSuffix(StringRef Name);
1272   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1273                         SMLoc NameLoc, OperandVector &Operands) override;
1274   //bool ProcessInstruction(MCInst &Inst);
1275 
1276   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1277 
1278   OperandMatchResultTy
1279   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1280                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1281                      bool (*ConvertResult)(int64_t &) = nullptr);
1282 
1283   OperandMatchResultTy
1284   parseOperandArrayWithPrefix(const char *Prefix,
1285                               OperandVector &Operands,
1286                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1287                               bool (*ConvertResult)(int64_t&) = nullptr);
1288 
1289   OperandMatchResultTy
1290   parseNamedBit(const char *Name, OperandVector &Operands,
1291                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1292   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1293                                              StringRef &Value);
1294 
1295   bool isModifier();
1296   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1297   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1298   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1299   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1300   bool parseSP3NegModifier();
1301   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1302   OperandMatchResultTy parseReg(OperandVector &Operands);
1303   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1304   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1305   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1306   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1307   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1308   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1309   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1310   OperandMatchResultTy parseUfmt(int64_t &Format);
1311   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1312   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1313   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1314   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1315   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1316   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1317   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1318 
1319   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1320   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1321   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1322   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1323 
1324   bool parseCnt(int64_t &IntVal);
1325   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1326   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1327 
1328 private:
1329   struct OperandInfoTy {
1330     int64_t Id;
1331     bool IsSymbolic = false;
1332     bool IsDefined = false;
1333 
1334     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1335   };
1336 
1337   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1338   bool validateSendMsg(const OperandInfoTy &Msg,
1339                        const OperandInfoTy &Op,
1340                        const OperandInfoTy &Stream,
1341                        const SMLoc Loc);
1342 
1343   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1344   bool validateHwreg(const OperandInfoTy &HwReg,
1345                      const int64_t Offset,
1346                      const int64_t Width,
1347                      const SMLoc Loc);
1348 
1349   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1350   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1351   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1352 
1353   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1354   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1355   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1356   bool validateSOPLiteral(const MCInst &Inst) const;
1357   bool validateConstantBusLimitations(const MCInst &Inst);
1358   bool validateEarlyClobberLimitations(const MCInst &Inst);
1359   bool validateIntClampSupported(const MCInst &Inst);
1360   bool validateMIMGAtomicDMask(const MCInst &Inst);
1361   bool validateMIMGGatherDMask(const MCInst &Inst);
1362   bool validateMovrels(const MCInst &Inst);
1363   bool validateMIMGDataSize(const MCInst &Inst);
1364   bool validateMIMGAddrSize(const MCInst &Inst);
1365   bool validateMIMGD16(const MCInst &Inst);
1366   bool validateMIMGDim(const MCInst &Inst);
1367   bool validateLdsDirect(const MCInst &Inst);
1368   bool validateOpSel(const MCInst &Inst);
1369   bool validateVccOperand(unsigned Reg) const;
1370   bool validateVOP3Literal(const MCInst &Inst) const;
1371   bool validateMAIAccWrite(const MCInst &Inst);
1372   unsigned getConstantBusLimit(unsigned Opcode) const;
1373   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1374   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1375   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1376 
1377   bool isSupportedMnemo(StringRef Mnemo,
1378                         const FeatureBitset &FBS);
1379   bool isSupportedMnemo(StringRef Mnemo,
1380                         const FeatureBitset &FBS,
1381                         ArrayRef<unsigned> Variants);
1382   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1383 
1384   bool isId(const StringRef Id) const;
1385   bool isId(const AsmToken &Token, const StringRef Id) const;
1386   bool isToken(const AsmToken::TokenKind Kind) const;
1387   bool trySkipId(const StringRef Id);
1388   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1389   bool trySkipToken(const AsmToken::TokenKind Kind);
1390   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1391   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1392   bool parseId(StringRef &Val, const StringRef ErrMsg);
1393 
1394   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1395   AsmToken::TokenKind getTokenKind() const;
1396   bool parseExpr(int64_t &Imm);
1397   bool parseExpr(OperandVector &Operands);
1398   StringRef getTokenStr() const;
1399   AsmToken peekToken();
1400   AsmToken getToken() const;
1401   SMLoc getLoc() const;
1402   void lex();
1403 
1404 public:
1405   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1406   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1407 
1408   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1409   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1410   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1411   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1412   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1413   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1414 
1415   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1416                             const unsigned MinVal,
1417                             const unsigned MaxVal,
1418                             const StringRef ErrMsg);
1419   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1420   bool parseSwizzleOffset(int64_t &Imm);
1421   bool parseSwizzleMacro(int64_t &Imm);
1422   bool parseSwizzleQuadPerm(int64_t &Imm);
1423   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1424   bool parseSwizzleBroadcast(int64_t &Imm);
1425   bool parseSwizzleSwap(int64_t &Imm);
1426   bool parseSwizzleReverse(int64_t &Imm);
1427 
1428   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1429   int64_t parseGPRIdxMacro();
1430 
1431   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1432   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1433   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1434   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1435   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1436 
1437   AMDGPUOperand::Ptr defaultDLC() const;
1438   AMDGPUOperand::Ptr defaultGLC() const;
1439   AMDGPUOperand::Ptr defaultSLC() const;
1440 
1441   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1442   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1443   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1444   AMDGPUOperand::Ptr defaultFlatOffset() const;
1445 
1446   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1447 
1448   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1449                OptionalImmIndexMap &OptionalIdx);
1450   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1451   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1452   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1453 
1454   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1455 
1456   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1457                bool IsAtomic = false);
1458   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1459   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1460 
1461   OperandMatchResultTy parseDim(OperandVector &Operands);
1462   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1463   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1464   AMDGPUOperand::Ptr defaultRowMask() const;
1465   AMDGPUOperand::Ptr defaultBankMask() const;
1466   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1467   AMDGPUOperand::Ptr defaultFI() const;
1468   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1469   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1470 
1471   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1472                                     AMDGPUOperand::ImmTy Type);
1473   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1474   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1475   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1476   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1477   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1478   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1479   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1480                uint64_t BasicInstType,
1481                bool SkipDstVcc = false,
1482                bool SkipSrcVcc = false);
1483 
1484   AMDGPUOperand::Ptr defaultBLGP() const;
1485   AMDGPUOperand::Ptr defaultCBSZ() const;
1486   AMDGPUOperand::Ptr defaultABID() const;
1487 
1488   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1489   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1490 };
1491 
1492 struct OptionalOperand {
1493   const char *Name;
1494   AMDGPUOperand::ImmTy Type;
1495   bool IsBit;
1496   bool (*ConvertResult)(int64_t&);
1497 };
1498 
1499 } // end anonymous namespace
1500 
1501 // May be called with integer type with equivalent bitwidth.
1502 static const fltSemantics *getFltSemantics(unsigned Size) {
1503   switch (Size) {
1504   case 4:
1505     return &APFloat::IEEEsingle();
1506   case 8:
1507     return &APFloat::IEEEdouble();
1508   case 2:
1509     return &APFloat::IEEEhalf();
1510   default:
1511     llvm_unreachable("unsupported fp type");
1512   }
1513 }
1514 
1515 static const fltSemantics *getFltSemantics(MVT VT) {
1516   return getFltSemantics(VT.getSizeInBits() / 8);
1517 }
1518 
1519 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1520   switch (OperandType) {
1521   case AMDGPU::OPERAND_REG_IMM_INT32:
1522   case AMDGPU::OPERAND_REG_IMM_FP32:
1523   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1524   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1525   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1527     return &APFloat::IEEEsingle();
1528   case AMDGPU::OPERAND_REG_IMM_INT64:
1529   case AMDGPU::OPERAND_REG_IMM_FP64:
1530   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1531   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1532     return &APFloat::IEEEdouble();
1533   case AMDGPU::OPERAND_REG_IMM_INT16:
1534   case AMDGPU::OPERAND_REG_IMM_FP16:
1535   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1536   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1537   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1538   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1539   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1540   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1541   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1542   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1543   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1544   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1545     return &APFloat::IEEEhalf();
1546   default:
1547     llvm_unreachable("unsupported fp type");
1548   }
1549 }
1550 
1551 //===----------------------------------------------------------------------===//
1552 // Operand
1553 //===----------------------------------------------------------------------===//
1554 
1555 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1556   bool Lost;
1557 
1558   // Convert literal to single precision
1559   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1560                                                APFloat::rmNearestTiesToEven,
1561                                                &Lost);
1562   // We allow precision lost but not overflow or underflow
1563   if (Status != APFloat::opOK &&
1564       Lost &&
1565       ((Status & APFloat::opOverflow)  != 0 ||
1566        (Status & APFloat::opUnderflow) != 0)) {
1567     return false;
1568   }
1569 
1570   return true;
1571 }
1572 
1573 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1574   return isUIntN(Size, Val) || isIntN(Size, Val);
1575 }
1576 
1577 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1578   if (VT.getScalarType() == MVT::i16) {
1579     // FP immediate values are broken.
1580     return isInlinableIntLiteral(Val);
1581   }
1582 
1583   // f16/v2f16 operands work correctly for all values.
1584   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1585 }
1586 
1587 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1588 
1589   // This is a hack to enable named inline values like
1590   // shared_base with both 32-bit and 64-bit operands.
1591   // Note that these values are defined as
1592   // 32-bit operands only.
1593   if (isInlineValue()) {
1594     return true;
1595   }
1596 
1597   if (!isImmTy(ImmTyNone)) {
1598     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1599     return false;
1600   }
1601   // TODO: We should avoid using host float here. It would be better to
1602   // check the float bit values which is what a few other places do.
1603   // We've had bot failures before due to weird NaN support on mips hosts.
1604 
1605   APInt Literal(64, Imm.Val);
1606 
1607   if (Imm.IsFPImm) { // We got fp literal token
1608     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1609       return AMDGPU::isInlinableLiteral64(Imm.Val,
1610                                           AsmParser->hasInv2PiInlineImm());
1611     }
1612 
1613     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1614     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1615       return false;
1616 
1617     if (type.getScalarSizeInBits() == 16) {
1618       return isInlineableLiteralOp16(
1619         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1620         type, AsmParser->hasInv2PiInlineImm());
1621     }
1622 
1623     // Check if single precision literal is inlinable
1624     return AMDGPU::isInlinableLiteral32(
1625       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1626       AsmParser->hasInv2PiInlineImm());
1627   }
1628 
1629   // We got int literal token.
1630   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1631     return AMDGPU::isInlinableLiteral64(Imm.Val,
1632                                         AsmParser->hasInv2PiInlineImm());
1633   }
1634 
1635   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1636     return false;
1637   }
1638 
1639   if (type.getScalarSizeInBits() == 16) {
1640     return isInlineableLiteralOp16(
1641       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1642       type, AsmParser->hasInv2PiInlineImm());
1643   }
1644 
1645   return AMDGPU::isInlinableLiteral32(
1646     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1647     AsmParser->hasInv2PiInlineImm());
1648 }
1649 
1650 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1651   // Check that this immediate can be added as literal
1652   if (!isImmTy(ImmTyNone)) {
1653     return false;
1654   }
1655 
1656   if (!Imm.IsFPImm) {
1657     // We got int literal token.
1658 
1659     if (type == MVT::f64 && hasFPModifiers()) {
1660       // Cannot apply fp modifiers to int literals preserving the same semantics
1661       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1662       // disable these cases.
1663       return false;
1664     }
1665 
1666     unsigned Size = type.getSizeInBits();
1667     if (Size == 64)
1668       Size = 32;
1669 
1670     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1671     // types.
1672     return isSafeTruncation(Imm.Val, Size);
1673   }
1674 
1675   // We got fp literal token
1676   if (type == MVT::f64) { // Expected 64-bit fp operand
1677     // We would set low 64-bits of literal to zeroes but we accept this literals
1678     return true;
1679   }
1680 
1681   if (type == MVT::i64) { // Expected 64-bit int operand
1682     // We don't allow fp literals in 64-bit integer instructions. It is
1683     // unclear how we should encode them.
1684     return false;
1685   }
1686 
1687   // We allow fp literals with f16x2 operands assuming that the specified
1688   // literal goes into the lower half and the upper half is zero. We also
1689   // require that the literal may be losslesly converted to f16.
1690   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1691                      (type == MVT::v2i16)? MVT::i16 : type;
1692 
1693   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1694   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1695 }
1696 
1697 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1698   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1699 }
1700 
1701 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1702   if (AsmParser->isVI())
1703     return isVReg32();
1704   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1705     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1706   else
1707     return false;
1708 }
1709 
1710 bool AMDGPUOperand::isSDWAFP16Operand() const {
1711   return isSDWAOperand(MVT::f16);
1712 }
1713 
1714 bool AMDGPUOperand::isSDWAFP32Operand() const {
1715   return isSDWAOperand(MVT::f32);
1716 }
1717 
1718 bool AMDGPUOperand::isSDWAInt16Operand() const {
1719   return isSDWAOperand(MVT::i16);
1720 }
1721 
1722 bool AMDGPUOperand::isSDWAInt32Operand() const {
1723   return isSDWAOperand(MVT::i32);
1724 }
1725 
1726 bool AMDGPUOperand::isBoolReg() const {
1727   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1728          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1729 }
1730 
1731 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1732 {
1733   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1734   assert(Size == 2 || Size == 4 || Size == 8);
1735 
1736   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1737 
1738   if (Imm.Mods.Abs) {
1739     Val &= ~FpSignMask;
1740   }
1741   if (Imm.Mods.Neg) {
1742     Val ^= FpSignMask;
1743   }
1744 
1745   return Val;
1746 }
1747 
1748 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1749   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1750                              Inst.getNumOperands())) {
1751     addLiteralImmOperand(Inst, Imm.Val,
1752                          ApplyModifiers &
1753                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1754   } else {
1755     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1756     Inst.addOperand(MCOperand::createImm(Imm.Val));
1757   }
1758 }
1759 
1760 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1761   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1762   auto OpNum = Inst.getNumOperands();
1763   // Check that this operand accepts literals
1764   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1765 
1766   if (ApplyModifiers) {
1767     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1768     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1769     Val = applyInputFPModifiers(Val, Size);
1770   }
1771 
1772   APInt Literal(64, Val);
1773   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1774 
1775   if (Imm.IsFPImm) { // We got fp literal token
1776     switch (OpTy) {
1777     case AMDGPU::OPERAND_REG_IMM_INT64:
1778     case AMDGPU::OPERAND_REG_IMM_FP64:
1779     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1780     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1781       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1782                                        AsmParser->hasInv2PiInlineImm())) {
1783         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1784         return;
1785       }
1786 
1787       // Non-inlineable
1788       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1789         // For fp operands we check if low 32 bits are zeros
1790         if (Literal.getLoBits(32) != 0) {
1791           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1792           "Can't encode literal as exact 64-bit floating-point operand. "
1793           "Low 32-bits will be set to zero");
1794         }
1795 
1796         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1797         return;
1798       }
1799 
1800       // We don't allow fp literals in 64-bit integer instructions. It is
1801       // unclear how we should encode them. This case should be checked earlier
1802       // in predicate methods (isLiteralImm())
1803       llvm_unreachable("fp literal in 64-bit integer instruction.");
1804 
1805     case AMDGPU::OPERAND_REG_IMM_INT32:
1806     case AMDGPU::OPERAND_REG_IMM_FP32:
1807     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1808     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1809     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1810     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1811     case AMDGPU::OPERAND_REG_IMM_INT16:
1812     case AMDGPU::OPERAND_REG_IMM_FP16:
1813     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1814     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1815     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1816     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1817     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1818     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1819     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1820     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1821     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1822     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1823       bool lost;
1824       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1825       // Convert literal to single precision
1826       FPLiteral.convert(*getOpFltSemantics(OpTy),
1827                         APFloat::rmNearestTiesToEven, &lost);
1828       // We allow precision lost but not overflow or underflow. This should be
1829       // checked earlier in isLiteralImm()
1830 
1831       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1832       Inst.addOperand(MCOperand::createImm(ImmVal));
1833       return;
1834     }
1835     default:
1836       llvm_unreachable("invalid operand size");
1837     }
1838 
1839     return;
1840   }
1841 
1842   // We got int literal token.
1843   // Only sign extend inline immediates.
1844   switch (OpTy) {
1845   case AMDGPU::OPERAND_REG_IMM_INT32:
1846   case AMDGPU::OPERAND_REG_IMM_FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1848   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1849   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1850   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1851   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1852   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1853     if (isSafeTruncation(Val, 32) &&
1854         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1855                                      AsmParser->hasInv2PiInlineImm())) {
1856       Inst.addOperand(MCOperand::createImm(Val));
1857       return;
1858     }
1859 
1860     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1861     return;
1862 
1863   case AMDGPU::OPERAND_REG_IMM_INT64:
1864   case AMDGPU::OPERAND_REG_IMM_FP64:
1865   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1866   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1867     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1868       Inst.addOperand(MCOperand::createImm(Val));
1869       return;
1870     }
1871 
1872     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1873     return;
1874 
1875   case AMDGPU::OPERAND_REG_IMM_INT16:
1876   case AMDGPU::OPERAND_REG_IMM_FP16:
1877   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1878   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1879   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1881     if (isSafeTruncation(Val, 16) &&
1882         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1883                                      AsmParser->hasInv2PiInlineImm())) {
1884       Inst.addOperand(MCOperand::createImm(Val));
1885       return;
1886     }
1887 
1888     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1889     return;
1890 
1891   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1892   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1893   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1894   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1895     assert(isSafeTruncation(Val, 16));
1896     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1897                                         AsmParser->hasInv2PiInlineImm()));
1898 
1899     Inst.addOperand(MCOperand::createImm(Val));
1900     return;
1901   }
1902   default:
1903     llvm_unreachable("invalid operand size");
1904   }
1905 }
1906 
1907 template <unsigned Bitwidth>
1908 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1909   APInt Literal(64, Imm.Val);
1910 
1911   if (!Imm.IsFPImm) {
1912     // We got int literal token.
1913     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1914     return;
1915   }
1916 
1917   bool Lost;
1918   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1919   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1920                     APFloat::rmNearestTiesToEven, &Lost);
1921   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1922 }
1923 
1924 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1925   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1926 }
1927 
1928 static bool isInlineValue(unsigned Reg) {
1929   switch (Reg) {
1930   case AMDGPU::SRC_SHARED_BASE:
1931   case AMDGPU::SRC_SHARED_LIMIT:
1932   case AMDGPU::SRC_PRIVATE_BASE:
1933   case AMDGPU::SRC_PRIVATE_LIMIT:
1934   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1935     return true;
1936   case AMDGPU::SRC_VCCZ:
1937   case AMDGPU::SRC_EXECZ:
1938   case AMDGPU::SRC_SCC:
1939     return true;
1940   case AMDGPU::SGPR_NULL:
1941     return true;
1942   default:
1943     return false;
1944   }
1945 }
1946 
1947 bool AMDGPUOperand::isInlineValue() const {
1948   return isRegKind() && ::isInlineValue(getReg());
1949 }
1950 
1951 //===----------------------------------------------------------------------===//
1952 // AsmParser
1953 //===----------------------------------------------------------------------===//
1954 
1955 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1956   if (Is == IS_VGPR) {
1957     switch (RegWidth) {
1958       default: return -1;
1959       case 1: return AMDGPU::VGPR_32RegClassID;
1960       case 2: return AMDGPU::VReg_64RegClassID;
1961       case 3: return AMDGPU::VReg_96RegClassID;
1962       case 4: return AMDGPU::VReg_128RegClassID;
1963       case 5: return AMDGPU::VReg_160RegClassID;
1964       case 6: return AMDGPU::VReg_192RegClassID;
1965       case 8: return AMDGPU::VReg_256RegClassID;
1966       case 16: return AMDGPU::VReg_512RegClassID;
1967       case 32: return AMDGPU::VReg_1024RegClassID;
1968     }
1969   } else if (Is == IS_TTMP) {
1970     switch (RegWidth) {
1971       default: return -1;
1972       case 1: return AMDGPU::TTMP_32RegClassID;
1973       case 2: return AMDGPU::TTMP_64RegClassID;
1974       case 4: return AMDGPU::TTMP_128RegClassID;
1975       case 8: return AMDGPU::TTMP_256RegClassID;
1976       case 16: return AMDGPU::TTMP_512RegClassID;
1977     }
1978   } else if (Is == IS_SGPR) {
1979     switch (RegWidth) {
1980       default: return -1;
1981       case 1: return AMDGPU::SGPR_32RegClassID;
1982       case 2: return AMDGPU::SGPR_64RegClassID;
1983       case 3: return AMDGPU::SGPR_96RegClassID;
1984       case 4: return AMDGPU::SGPR_128RegClassID;
1985       case 5: return AMDGPU::SGPR_160RegClassID;
1986       case 6: return AMDGPU::SGPR_192RegClassID;
1987       case 8: return AMDGPU::SGPR_256RegClassID;
1988       case 16: return AMDGPU::SGPR_512RegClassID;
1989     }
1990   } else if (Is == IS_AGPR) {
1991     switch (RegWidth) {
1992       default: return -1;
1993       case 1: return AMDGPU::AGPR_32RegClassID;
1994       case 2: return AMDGPU::AReg_64RegClassID;
1995       case 3: return AMDGPU::AReg_96RegClassID;
1996       case 4: return AMDGPU::AReg_128RegClassID;
1997       case 5: return AMDGPU::AReg_160RegClassID;
1998       case 6: return AMDGPU::AReg_192RegClassID;
1999       case 8: return AMDGPU::AReg_256RegClassID;
2000       case 16: return AMDGPU::AReg_512RegClassID;
2001       case 32: return AMDGPU::AReg_1024RegClassID;
2002     }
2003   }
2004   return -1;
2005 }
2006 
2007 static unsigned getSpecialRegForName(StringRef RegName) {
2008   return StringSwitch<unsigned>(RegName)
2009     .Case("exec", AMDGPU::EXEC)
2010     .Case("vcc", AMDGPU::VCC)
2011     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2012     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2013     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2014     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2015     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2016     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2017     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2018     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2019     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2020     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2021     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2022     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2023     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2024     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2025     .Case("m0", AMDGPU::M0)
2026     .Case("vccz", AMDGPU::SRC_VCCZ)
2027     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2028     .Case("execz", AMDGPU::SRC_EXECZ)
2029     .Case("src_execz", AMDGPU::SRC_EXECZ)
2030     .Case("scc", AMDGPU::SRC_SCC)
2031     .Case("src_scc", AMDGPU::SRC_SCC)
2032     .Case("tba", AMDGPU::TBA)
2033     .Case("tma", AMDGPU::TMA)
2034     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2035     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2036     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2037     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2038     .Case("vcc_lo", AMDGPU::VCC_LO)
2039     .Case("vcc_hi", AMDGPU::VCC_HI)
2040     .Case("exec_lo", AMDGPU::EXEC_LO)
2041     .Case("exec_hi", AMDGPU::EXEC_HI)
2042     .Case("tma_lo", AMDGPU::TMA_LO)
2043     .Case("tma_hi", AMDGPU::TMA_HI)
2044     .Case("tba_lo", AMDGPU::TBA_LO)
2045     .Case("tba_hi", AMDGPU::TBA_HI)
2046     .Case("pc", AMDGPU::PC_REG)
2047     .Case("null", AMDGPU::SGPR_NULL)
2048     .Default(AMDGPU::NoRegister);
2049 }
2050 
2051 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2052                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2053   auto R = parseRegister();
2054   if (!R) return true;
2055   assert(R->isReg());
2056   RegNo = R->getReg();
2057   StartLoc = R->getStartLoc();
2058   EndLoc = R->getEndLoc();
2059   return false;
2060 }
2061 
2062 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2063                                     SMLoc &EndLoc) {
2064   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2065 }
2066 
2067 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2068                                                        SMLoc &StartLoc,
2069                                                        SMLoc &EndLoc) {
2070   bool Result =
2071       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2072   bool PendingErrors = getParser().hasPendingError();
2073   getParser().clearPendingErrors();
2074   if (PendingErrors)
2075     return MatchOperand_ParseFail;
2076   if (Result)
2077     return MatchOperand_NoMatch;
2078   return MatchOperand_Success;
2079 }
2080 
2081 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2082                                             RegisterKind RegKind, unsigned Reg1,
2083                                             SMLoc Loc) {
2084   switch (RegKind) {
2085   case IS_SPECIAL:
2086     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2087       Reg = AMDGPU::EXEC;
2088       RegWidth = 2;
2089       return true;
2090     }
2091     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2092       Reg = AMDGPU::FLAT_SCR;
2093       RegWidth = 2;
2094       return true;
2095     }
2096     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2097       Reg = AMDGPU::XNACK_MASK;
2098       RegWidth = 2;
2099       return true;
2100     }
2101     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2102       Reg = AMDGPU::VCC;
2103       RegWidth = 2;
2104       return true;
2105     }
2106     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2107       Reg = AMDGPU::TBA;
2108       RegWidth = 2;
2109       return true;
2110     }
2111     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2112       Reg = AMDGPU::TMA;
2113       RegWidth = 2;
2114       return true;
2115     }
2116     Error(Loc, "register does not fit in the list");
2117     return false;
2118   case IS_VGPR:
2119   case IS_SGPR:
2120   case IS_AGPR:
2121   case IS_TTMP:
2122     if (Reg1 != Reg + RegWidth) {
2123       Error(Loc, "registers in a list must have consecutive indices");
2124       return false;
2125     }
2126     RegWidth++;
2127     return true;
2128   default:
2129     llvm_unreachable("unexpected register kind");
2130   }
2131 }
2132 
2133 struct RegInfo {
2134   StringLiteral Name;
2135   RegisterKind Kind;
2136 };
2137 
2138 static constexpr RegInfo RegularRegisters[] = {
2139   {{"v"},    IS_VGPR},
2140   {{"s"},    IS_SGPR},
2141   {{"ttmp"}, IS_TTMP},
2142   {{"acc"},  IS_AGPR},
2143   {{"a"},    IS_AGPR},
2144 };
2145 
2146 static bool isRegularReg(RegisterKind Kind) {
2147   return Kind == IS_VGPR ||
2148          Kind == IS_SGPR ||
2149          Kind == IS_TTMP ||
2150          Kind == IS_AGPR;
2151 }
2152 
2153 static const RegInfo* getRegularRegInfo(StringRef Str) {
2154   for (const RegInfo &Reg : RegularRegisters)
2155     if (Str.startswith(Reg.Name))
2156       return &Reg;
2157   return nullptr;
2158 }
2159 
2160 static bool getRegNum(StringRef Str, unsigned& Num) {
2161   return !Str.getAsInteger(10, Num);
2162 }
2163 
2164 bool
2165 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2166                             const AsmToken &NextToken) const {
2167 
2168   // A list of consecutive registers: [s0,s1,s2,s3]
2169   if (Token.is(AsmToken::LBrac))
2170     return true;
2171 
2172   if (!Token.is(AsmToken::Identifier))
2173     return false;
2174 
2175   // A single register like s0 or a range of registers like s[0:1]
2176 
2177   StringRef Str = Token.getString();
2178   const RegInfo *Reg = getRegularRegInfo(Str);
2179   if (Reg) {
2180     StringRef RegName = Reg->Name;
2181     StringRef RegSuffix = Str.substr(RegName.size());
2182     if (!RegSuffix.empty()) {
2183       unsigned Num;
2184       // A single register with an index: rXX
2185       if (getRegNum(RegSuffix, Num))
2186         return true;
2187     } else {
2188       // A range of registers: r[XX:YY].
2189       if (NextToken.is(AsmToken::LBrac))
2190         return true;
2191     }
2192   }
2193 
2194   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2195 }
2196 
2197 bool
2198 AMDGPUAsmParser::isRegister()
2199 {
2200   return isRegister(getToken(), peekToken());
2201 }
2202 
2203 unsigned
2204 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2205                                unsigned RegNum,
2206                                unsigned RegWidth,
2207                                SMLoc Loc) {
2208 
2209   assert(isRegularReg(RegKind));
2210 
2211   unsigned AlignSize = 1;
2212   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2213     // SGPR and TTMP registers must be aligned.
2214     // Max required alignment is 4 dwords.
2215     AlignSize = std::min(RegWidth, 4u);
2216   }
2217 
2218   if (RegNum % AlignSize != 0) {
2219     Error(Loc, "invalid register alignment");
2220     return AMDGPU::NoRegister;
2221   }
2222 
2223   unsigned RegIdx = RegNum / AlignSize;
2224   int RCID = getRegClass(RegKind, RegWidth);
2225   if (RCID == -1) {
2226     Error(Loc, "invalid or unsupported register size");
2227     return AMDGPU::NoRegister;
2228   }
2229 
2230   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2231   const MCRegisterClass RC = TRI->getRegClass(RCID);
2232   if (RegIdx >= RC.getNumRegs()) {
2233     Error(Loc, "register index is out of range");
2234     return AMDGPU::NoRegister;
2235   }
2236 
2237   return RC.getRegister(RegIdx);
2238 }
2239 
2240 bool
2241 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2242   int64_t RegLo, RegHi;
2243   if (!skipToken(AsmToken::LBrac, "missing register index"))
2244     return false;
2245 
2246   SMLoc FirstIdxLoc = getLoc();
2247   SMLoc SecondIdxLoc;
2248 
2249   if (!parseExpr(RegLo))
2250     return false;
2251 
2252   if (trySkipToken(AsmToken::Colon)) {
2253     SecondIdxLoc = getLoc();
2254     if (!parseExpr(RegHi))
2255       return false;
2256   } else {
2257     RegHi = RegLo;
2258   }
2259 
2260   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2261     return false;
2262 
2263   if (!isUInt<32>(RegLo)) {
2264     Error(FirstIdxLoc, "invalid register index");
2265     return false;
2266   }
2267 
2268   if (!isUInt<32>(RegHi)) {
2269     Error(SecondIdxLoc, "invalid register index");
2270     return false;
2271   }
2272 
2273   if (RegLo > RegHi) {
2274     Error(FirstIdxLoc, "first register index should not exceed second index");
2275     return false;
2276   }
2277 
2278   Num = static_cast<unsigned>(RegLo);
2279   Width = (RegHi - RegLo) + 1;
2280   return true;
2281 }
2282 
2283 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2284                                           unsigned &RegNum, unsigned &RegWidth,
2285                                           SmallVectorImpl<AsmToken> &Tokens) {
2286   assert(isToken(AsmToken::Identifier));
2287   unsigned Reg = getSpecialRegForName(getTokenStr());
2288   if (Reg) {
2289     RegNum = 0;
2290     RegWidth = 1;
2291     RegKind = IS_SPECIAL;
2292     Tokens.push_back(getToken());
2293     lex(); // skip register name
2294   }
2295   return Reg;
2296 }
2297 
2298 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2299                                           unsigned &RegNum, unsigned &RegWidth,
2300                                           SmallVectorImpl<AsmToken> &Tokens) {
2301   assert(isToken(AsmToken::Identifier));
2302   StringRef RegName = getTokenStr();
2303   auto Loc = getLoc();
2304 
2305   const RegInfo *RI = getRegularRegInfo(RegName);
2306   if (!RI) {
2307     Error(Loc, "invalid register name");
2308     return AMDGPU::NoRegister;
2309   }
2310 
2311   Tokens.push_back(getToken());
2312   lex(); // skip register name
2313 
2314   RegKind = RI->Kind;
2315   StringRef RegSuffix = RegName.substr(RI->Name.size());
2316   if (!RegSuffix.empty()) {
2317     // Single 32-bit register: vXX.
2318     if (!getRegNum(RegSuffix, RegNum)) {
2319       Error(Loc, "invalid register index");
2320       return AMDGPU::NoRegister;
2321     }
2322     RegWidth = 1;
2323   } else {
2324     // Range of registers: v[XX:YY]. ":YY" is optional.
2325     if (!ParseRegRange(RegNum, RegWidth))
2326       return AMDGPU::NoRegister;
2327   }
2328 
2329   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2330 }
2331 
2332 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2333                                        unsigned &RegWidth,
2334                                        SmallVectorImpl<AsmToken> &Tokens) {
2335   unsigned Reg = AMDGPU::NoRegister;
2336   auto ListLoc = getLoc();
2337 
2338   if (!skipToken(AsmToken::LBrac,
2339                  "expected a register or a list of registers")) {
2340     return AMDGPU::NoRegister;
2341   }
2342 
2343   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2344 
2345   auto Loc = getLoc();
2346   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2347     return AMDGPU::NoRegister;
2348   if (RegWidth != 1) {
2349     Error(Loc, "expected a single 32-bit register");
2350     return AMDGPU::NoRegister;
2351   }
2352 
2353   for (; trySkipToken(AsmToken::Comma); ) {
2354     RegisterKind NextRegKind;
2355     unsigned NextReg, NextRegNum, NextRegWidth;
2356     Loc = getLoc();
2357 
2358     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2359                              NextRegNum, NextRegWidth,
2360                              Tokens)) {
2361       return AMDGPU::NoRegister;
2362     }
2363     if (NextRegWidth != 1) {
2364       Error(Loc, "expected a single 32-bit register");
2365       return AMDGPU::NoRegister;
2366     }
2367     if (NextRegKind != RegKind) {
2368       Error(Loc, "registers in a list must be of the same kind");
2369       return AMDGPU::NoRegister;
2370     }
2371     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2372       return AMDGPU::NoRegister;
2373   }
2374 
2375   if (!skipToken(AsmToken::RBrac,
2376                  "expected a comma or a closing square bracket")) {
2377     return AMDGPU::NoRegister;
2378   }
2379 
2380   if (isRegularReg(RegKind))
2381     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2382 
2383   return Reg;
2384 }
2385 
2386 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2387                                           unsigned &RegNum, unsigned &RegWidth,
2388                                           SmallVectorImpl<AsmToken> &Tokens) {
2389   auto Loc = getLoc();
2390   Reg = AMDGPU::NoRegister;
2391 
2392   if (isToken(AsmToken::Identifier)) {
2393     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2394     if (Reg == AMDGPU::NoRegister)
2395       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2396   } else {
2397     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2398   }
2399 
2400   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2401   if (Reg == AMDGPU::NoRegister) {
2402     assert(Parser.hasPendingError());
2403     return false;
2404   }
2405 
2406   if (!subtargetHasRegister(*TRI, Reg)) {
2407     if (Reg == AMDGPU::SGPR_NULL) {
2408       Error(Loc, "'null' operand is not supported on this GPU");
2409     } else {
2410       Error(Loc, "register not available on this GPU");
2411     }
2412     return false;
2413   }
2414 
2415   return true;
2416 }
2417 
2418 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2419                                           unsigned &RegNum, unsigned &RegWidth,
2420                                           bool RestoreOnFailure /*=false*/) {
2421   Reg = AMDGPU::NoRegister;
2422 
2423   SmallVector<AsmToken, 1> Tokens;
2424   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2425     if (RestoreOnFailure) {
2426       while (!Tokens.empty()) {
2427         getLexer().UnLex(Tokens.pop_back_val());
2428       }
2429     }
2430     return true;
2431   }
2432   return false;
2433 }
2434 
2435 Optional<StringRef>
2436 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2437   switch (RegKind) {
2438   case IS_VGPR:
2439     return StringRef(".amdgcn.next_free_vgpr");
2440   case IS_SGPR:
2441     return StringRef(".amdgcn.next_free_sgpr");
2442   default:
2443     return None;
2444   }
2445 }
2446 
2447 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2448   auto SymbolName = getGprCountSymbolName(RegKind);
2449   assert(SymbolName && "initializing invalid register kind");
2450   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2451   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2452 }
2453 
2454 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2455                                             unsigned DwordRegIndex,
2456                                             unsigned RegWidth) {
2457   // Symbols are only defined for GCN targets
2458   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2459     return true;
2460 
2461   auto SymbolName = getGprCountSymbolName(RegKind);
2462   if (!SymbolName)
2463     return true;
2464   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2465 
2466   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2467   int64_t OldCount;
2468 
2469   if (!Sym->isVariable())
2470     return !Error(getParser().getTok().getLoc(),
2471                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2472   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2473     return !Error(
2474         getParser().getTok().getLoc(),
2475         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2476 
2477   if (OldCount <= NewMax)
2478     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2479 
2480   return true;
2481 }
2482 
2483 std::unique_ptr<AMDGPUOperand>
2484 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2485   const auto &Tok = Parser.getTok();
2486   SMLoc StartLoc = Tok.getLoc();
2487   SMLoc EndLoc = Tok.getEndLoc();
2488   RegisterKind RegKind;
2489   unsigned Reg, RegNum, RegWidth;
2490 
2491   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2492     return nullptr;
2493   }
2494   if (isHsaAbiVersion3(&getSTI())) {
2495     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2496       return nullptr;
2497   } else
2498     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2499   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2500 }
2501 
2502 OperandMatchResultTy
2503 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2504   // TODO: add syntactic sugar for 1/(2*PI)
2505 
2506   assert(!isRegister());
2507   assert(!isModifier());
2508 
2509   const auto& Tok = getToken();
2510   const auto& NextTok = peekToken();
2511   bool IsReal = Tok.is(AsmToken::Real);
2512   SMLoc S = getLoc();
2513   bool Negate = false;
2514 
2515   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2516     lex();
2517     IsReal = true;
2518     Negate = true;
2519   }
2520 
2521   if (IsReal) {
2522     // Floating-point expressions are not supported.
2523     // Can only allow floating-point literals with an
2524     // optional sign.
2525 
2526     StringRef Num = getTokenStr();
2527     lex();
2528 
2529     APFloat RealVal(APFloat::IEEEdouble());
2530     auto roundMode = APFloat::rmNearestTiesToEven;
2531     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2532       return MatchOperand_ParseFail;
2533     }
2534     if (Negate)
2535       RealVal.changeSign();
2536 
2537     Operands.push_back(
2538       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2539                                AMDGPUOperand::ImmTyNone, true));
2540 
2541     return MatchOperand_Success;
2542 
2543   } else {
2544     int64_t IntVal;
2545     const MCExpr *Expr;
2546     SMLoc S = getLoc();
2547 
2548     if (HasSP3AbsModifier) {
2549       // This is a workaround for handling expressions
2550       // as arguments of SP3 'abs' modifier, for example:
2551       //     |1.0|
2552       //     |-1|
2553       //     |1+x|
2554       // This syntax is not compatible with syntax of standard
2555       // MC expressions (due to the trailing '|').
2556       SMLoc EndLoc;
2557       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2558         return MatchOperand_ParseFail;
2559     } else {
2560       if (Parser.parseExpression(Expr))
2561         return MatchOperand_ParseFail;
2562     }
2563 
2564     if (Expr->evaluateAsAbsolute(IntVal)) {
2565       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2566     } else {
2567       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2568     }
2569 
2570     return MatchOperand_Success;
2571   }
2572 
2573   return MatchOperand_NoMatch;
2574 }
2575 
2576 OperandMatchResultTy
2577 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2578   if (!isRegister())
2579     return MatchOperand_NoMatch;
2580 
2581   if (auto R = parseRegister()) {
2582     assert(R->isReg());
2583     Operands.push_back(std::move(R));
2584     return MatchOperand_Success;
2585   }
2586   return MatchOperand_ParseFail;
2587 }
2588 
2589 OperandMatchResultTy
2590 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2591   auto res = parseReg(Operands);
2592   if (res != MatchOperand_NoMatch) {
2593     return res;
2594   } else if (isModifier()) {
2595     return MatchOperand_NoMatch;
2596   } else {
2597     return parseImm(Operands, HasSP3AbsMod);
2598   }
2599 }
2600 
2601 bool
2602 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2603   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2604     const auto &str = Token.getString();
2605     return str == "abs" || str == "neg" || str == "sext";
2606   }
2607   return false;
2608 }
2609 
2610 bool
2611 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2612   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2613 }
2614 
2615 bool
2616 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2617   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2618 }
2619 
2620 bool
2621 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2622   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2623 }
2624 
2625 // Check if this is an operand modifier or an opcode modifier
2626 // which may look like an expression but it is not. We should
2627 // avoid parsing these modifiers as expressions. Currently
2628 // recognized sequences are:
2629 //   |...|
2630 //   abs(...)
2631 //   neg(...)
2632 //   sext(...)
2633 //   -reg
2634 //   -|...|
2635 //   -abs(...)
2636 //   name:...
2637 // Note that simple opcode modifiers like 'gds' may be parsed as
2638 // expressions; this is a special case. See getExpressionAsToken.
2639 //
2640 bool
2641 AMDGPUAsmParser::isModifier() {
2642 
2643   AsmToken Tok = getToken();
2644   AsmToken NextToken[2];
2645   peekTokens(NextToken);
2646 
2647   return isOperandModifier(Tok, NextToken[0]) ||
2648          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2649          isOpcodeModifierWithVal(Tok, NextToken[0]);
2650 }
2651 
2652 // Check if the current token is an SP3 'neg' modifier.
2653 // Currently this modifier is allowed in the following context:
2654 //
2655 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2656 // 2. Before an 'abs' modifier: -abs(...)
2657 // 3. Before an SP3 'abs' modifier: -|...|
2658 //
2659 // In all other cases "-" is handled as a part
2660 // of an expression that follows the sign.
2661 //
2662 // Note: When "-" is followed by an integer literal,
2663 // this is interpreted as integer negation rather
2664 // than a floating-point NEG modifier applied to N.
2665 // Beside being contr-intuitive, such use of floating-point
2666 // NEG modifier would have resulted in different meaning
2667 // of integer literals used with VOP1/2/C and VOP3,
2668 // for example:
2669 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2670 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2671 // Negative fp literals with preceding "-" are
2672 // handled likewise for unifomtity
2673 //
2674 bool
2675 AMDGPUAsmParser::parseSP3NegModifier() {
2676 
2677   AsmToken NextToken[2];
2678   peekTokens(NextToken);
2679 
2680   if (isToken(AsmToken::Minus) &&
2681       (isRegister(NextToken[0], NextToken[1]) ||
2682        NextToken[0].is(AsmToken::Pipe) ||
2683        isId(NextToken[0], "abs"))) {
2684     lex();
2685     return true;
2686   }
2687 
2688   return false;
2689 }
2690 
2691 OperandMatchResultTy
2692 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2693                                               bool AllowImm) {
2694   bool Neg, SP3Neg;
2695   bool Abs, SP3Abs;
2696   SMLoc Loc;
2697 
2698   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2699   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2700     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2701     return MatchOperand_ParseFail;
2702   }
2703 
2704   SP3Neg = parseSP3NegModifier();
2705 
2706   Loc = getLoc();
2707   Neg = trySkipId("neg");
2708   if (Neg && SP3Neg) {
2709     Error(Loc, "expected register or immediate");
2710     return MatchOperand_ParseFail;
2711   }
2712   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2713     return MatchOperand_ParseFail;
2714 
2715   Abs = trySkipId("abs");
2716   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2717     return MatchOperand_ParseFail;
2718 
2719   Loc = getLoc();
2720   SP3Abs = trySkipToken(AsmToken::Pipe);
2721   if (Abs && SP3Abs) {
2722     Error(Loc, "expected register or immediate");
2723     return MatchOperand_ParseFail;
2724   }
2725 
2726   OperandMatchResultTy Res;
2727   if (AllowImm) {
2728     Res = parseRegOrImm(Operands, SP3Abs);
2729   } else {
2730     Res = parseReg(Operands);
2731   }
2732   if (Res != MatchOperand_Success) {
2733     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2734   }
2735 
2736   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2737     return MatchOperand_ParseFail;
2738   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2739     return MatchOperand_ParseFail;
2740   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2741     return MatchOperand_ParseFail;
2742 
2743   AMDGPUOperand::Modifiers Mods;
2744   Mods.Abs = Abs || SP3Abs;
2745   Mods.Neg = Neg || SP3Neg;
2746 
2747   if (Mods.hasFPModifiers()) {
2748     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2749     if (Op.isExpr()) {
2750       Error(Op.getStartLoc(), "expected an absolute expression");
2751       return MatchOperand_ParseFail;
2752     }
2753     Op.setModifiers(Mods);
2754   }
2755   return MatchOperand_Success;
2756 }
2757 
2758 OperandMatchResultTy
2759 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2760                                                bool AllowImm) {
2761   bool Sext = trySkipId("sext");
2762   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2763     return MatchOperand_ParseFail;
2764 
2765   OperandMatchResultTy Res;
2766   if (AllowImm) {
2767     Res = parseRegOrImm(Operands);
2768   } else {
2769     Res = parseReg(Operands);
2770   }
2771   if (Res != MatchOperand_Success) {
2772     return Sext? MatchOperand_ParseFail : Res;
2773   }
2774 
2775   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2776     return MatchOperand_ParseFail;
2777 
2778   AMDGPUOperand::Modifiers Mods;
2779   Mods.Sext = Sext;
2780 
2781   if (Mods.hasIntModifiers()) {
2782     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2783     if (Op.isExpr()) {
2784       Error(Op.getStartLoc(), "expected an absolute expression");
2785       return MatchOperand_ParseFail;
2786     }
2787     Op.setModifiers(Mods);
2788   }
2789 
2790   return MatchOperand_Success;
2791 }
2792 
2793 OperandMatchResultTy
2794 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2795   return parseRegOrImmWithFPInputMods(Operands, false);
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2800   return parseRegOrImmWithIntInputMods(Operands, false);
2801 }
2802 
2803 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2804   auto Loc = getLoc();
2805   if (trySkipId("off")) {
2806     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2807                                                 AMDGPUOperand::ImmTyOff, false));
2808     return MatchOperand_Success;
2809   }
2810 
2811   if (!isRegister())
2812     return MatchOperand_NoMatch;
2813 
2814   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2815   if (Reg) {
2816     Operands.push_back(std::move(Reg));
2817     return MatchOperand_Success;
2818   }
2819 
2820   return MatchOperand_ParseFail;
2821 
2822 }
2823 
2824 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2825   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2826 
2827   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2828       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2829       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2830       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2831     return Match_InvalidOperand;
2832 
2833   if ((TSFlags & SIInstrFlags::VOP3) &&
2834       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2835       getForcedEncodingSize() != 64)
2836     return Match_PreferE32;
2837 
2838   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2839       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2840     // v_mac_f32/16 allow only dst_sel == DWORD;
2841     auto OpNum =
2842         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2843     const auto &Op = Inst.getOperand(OpNum);
2844     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2845       return Match_InvalidOperand;
2846     }
2847   }
2848 
2849   return Match_Success;
2850 }
2851 
2852 static ArrayRef<unsigned> getAllVariants() {
2853   static const unsigned Variants[] = {
2854     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2855     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2856   };
2857 
2858   return makeArrayRef(Variants);
2859 }
2860 
2861 // What asm variants we should check
2862 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2863   if (getForcedEncodingSize() == 32) {
2864     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2865     return makeArrayRef(Variants);
2866   }
2867 
2868   if (isForcedVOP3()) {
2869     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2870     return makeArrayRef(Variants);
2871   }
2872 
2873   if (isForcedSDWA()) {
2874     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2875                                         AMDGPUAsmVariants::SDWA9};
2876     return makeArrayRef(Variants);
2877   }
2878 
2879   if (isForcedDPP()) {
2880     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2881     return makeArrayRef(Variants);
2882   }
2883 
2884   return getAllVariants();
2885 }
2886 
2887 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2888   if (getForcedEncodingSize() == 32)
2889     return "e32";
2890 
2891   if (isForcedVOP3())
2892     return "e64";
2893 
2894   if (isForcedSDWA())
2895     return "sdwa";
2896 
2897   if (isForcedDPP())
2898     return "dpp";
2899 
2900   return "";
2901 }
2902 
2903 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2904   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2905   const unsigned Num = Desc.getNumImplicitUses();
2906   for (unsigned i = 0; i < Num; ++i) {
2907     unsigned Reg = Desc.ImplicitUses[i];
2908     switch (Reg) {
2909     case AMDGPU::FLAT_SCR:
2910     case AMDGPU::VCC:
2911     case AMDGPU::VCC_LO:
2912     case AMDGPU::VCC_HI:
2913     case AMDGPU::M0:
2914       return Reg;
2915     default:
2916       break;
2917     }
2918   }
2919   return AMDGPU::NoRegister;
2920 }
2921 
2922 // NB: This code is correct only when used to check constant
2923 // bus limitations because GFX7 support no f16 inline constants.
2924 // Note that there are no cases when a GFX7 opcode violates
2925 // constant bus limitations due to the use of an f16 constant.
2926 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2927                                        unsigned OpIdx) const {
2928   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2929 
2930   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2931     return false;
2932   }
2933 
2934   const MCOperand &MO = Inst.getOperand(OpIdx);
2935 
2936   int64_t Val = MO.getImm();
2937   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2938 
2939   switch (OpSize) { // expected operand size
2940   case 8:
2941     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2942   case 4:
2943     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2944   case 2: {
2945     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2946     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2947         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2948         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2949       return AMDGPU::isInlinableIntLiteral(Val);
2950 
2951     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2952         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2953         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2954       return AMDGPU::isInlinableIntLiteralV216(Val);
2955 
2956     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2957         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2958         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2959       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2960 
2961     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2962   }
2963   default:
2964     llvm_unreachable("invalid operand size");
2965   }
2966 }
2967 
2968 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2969   if (!isGFX10())
2970     return 1;
2971 
2972   switch (Opcode) {
2973   // 64-bit shift instructions can use only one scalar value input
2974   case AMDGPU::V_LSHLREV_B64:
2975   case AMDGPU::V_LSHLREV_B64_gfx10:
2976   case AMDGPU::V_LSHL_B64:
2977   case AMDGPU::V_LSHRREV_B64:
2978   case AMDGPU::V_LSHRREV_B64_gfx10:
2979   case AMDGPU::V_LSHR_B64:
2980   case AMDGPU::V_ASHRREV_I64:
2981   case AMDGPU::V_ASHRREV_I64_gfx10:
2982   case AMDGPU::V_ASHR_I64:
2983     return 1;
2984   default:
2985     return 2;
2986   }
2987 }
2988 
2989 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2990   const MCOperand &MO = Inst.getOperand(OpIdx);
2991   if (MO.isImm()) {
2992     return !isInlineConstant(Inst, OpIdx);
2993   } else if (MO.isReg()) {
2994     auto Reg = MO.getReg();
2995     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2996     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2997   } else {
2998     return true;
2999   }
3000 }
3001 
3002 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
3003   const unsigned Opcode = Inst.getOpcode();
3004   const MCInstrDesc &Desc = MII.get(Opcode);
3005   unsigned ConstantBusUseCount = 0;
3006   unsigned NumLiterals = 0;
3007   unsigned LiteralSize;
3008 
3009   if (Desc.TSFlags &
3010       (SIInstrFlags::VOPC |
3011        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3012        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3013        SIInstrFlags::SDWA)) {
3014     // Check special imm operands (used by madmk, etc)
3015     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3016       ++ConstantBusUseCount;
3017     }
3018 
3019     SmallDenseSet<unsigned> SGPRsUsed;
3020     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3021     if (SGPRUsed != AMDGPU::NoRegister) {
3022       SGPRsUsed.insert(SGPRUsed);
3023       ++ConstantBusUseCount;
3024     }
3025 
3026     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3027     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3028     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3029 
3030     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3031 
3032     for (int OpIdx : OpIndices) {
3033       if (OpIdx == -1) break;
3034 
3035       const MCOperand &MO = Inst.getOperand(OpIdx);
3036       if (usesConstantBus(Inst, OpIdx)) {
3037         if (MO.isReg()) {
3038           const unsigned Reg = mc2PseudoReg(MO.getReg());
3039           // Pairs of registers with a partial intersections like these
3040           //   s0, s[0:1]
3041           //   flat_scratch_lo, flat_scratch
3042           //   flat_scratch_lo, flat_scratch_hi
3043           // are theoretically valid but they are disabled anyway.
3044           // Note that this code mimics SIInstrInfo::verifyInstruction
3045           if (!SGPRsUsed.count(Reg)) {
3046             SGPRsUsed.insert(Reg);
3047             ++ConstantBusUseCount;
3048           }
3049         } else { // Expression or a literal
3050 
3051           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3052             continue; // special operand like VINTERP attr_chan
3053 
3054           // An instruction may use only one literal.
3055           // This has been validated on the previous step.
3056           // See validateVOP3Literal.
3057           // This literal may be used as more than one operand.
3058           // If all these operands are of the same size,
3059           // this literal counts as one scalar value.
3060           // Otherwise it counts as 2 scalar values.
3061           // See "GFX10 Shader Programming", section 3.6.2.3.
3062 
3063           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3064           if (Size < 4) Size = 4;
3065 
3066           if (NumLiterals == 0) {
3067             NumLiterals = 1;
3068             LiteralSize = Size;
3069           } else if (LiteralSize != Size) {
3070             NumLiterals = 2;
3071           }
3072         }
3073       }
3074     }
3075   }
3076   ConstantBusUseCount += NumLiterals;
3077 
3078   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3079 }
3080 
3081 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3082   const unsigned Opcode = Inst.getOpcode();
3083   const MCInstrDesc &Desc = MII.get(Opcode);
3084 
3085   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3086   if (DstIdx == -1 ||
3087       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3088     return true;
3089   }
3090 
3091   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3092 
3093   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3094   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3095   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3096 
3097   assert(DstIdx != -1);
3098   const MCOperand &Dst = Inst.getOperand(DstIdx);
3099   assert(Dst.isReg());
3100   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3101 
3102   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3103 
3104   for (int SrcIdx : SrcIndices) {
3105     if (SrcIdx == -1) break;
3106     const MCOperand &Src = Inst.getOperand(SrcIdx);
3107     if (Src.isReg()) {
3108       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3109       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3110         return false;
3111       }
3112     }
3113   }
3114 
3115   return true;
3116 }
3117 
3118 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3119 
3120   const unsigned Opc = Inst.getOpcode();
3121   const MCInstrDesc &Desc = MII.get(Opc);
3122 
3123   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3124     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3125     assert(ClampIdx != -1);
3126     return Inst.getOperand(ClampIdx).getImm() == 0;
3127   }
3128 
3129   return true;
3130 }
3131 
3132 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3133 
3134   const unsigned Opc = Inst.getOpcode();
3135   const MCInstrDesc &Desc = MII.get(Opc);
3136 
3137   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3138     return true;
3139 
3140   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3141   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3142   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3143 
3144   assert(VDataIdx != -1);
3145 
3146   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3147     return true;
3148 
3149   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3150   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3151   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3152   if (DMask == 0)
3153     DMask = 1;
3154 
3155   unsigned DataSize =
3156     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3157   if (hasPackedD16()) {
3158     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3159     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3160       DataSize = (DataSize + 1) / 2;
3161   }
3162 
3163   return (VDataSize / 4) == DataSize + TFESize;
3164 }
3165 
3166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3167   const unsigned Opc = Inst.getOpcode();
3168   const MCInstrDesc &Desc = MII.get(Opc);
3169 
3170   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3171     return true;
3172 
3173   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3174 
3175   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3176       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3177   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3178   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3179   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3180 
3181   assert(VAddr0Idx != -1);
3182   assert(SrsrcIdx != -1);
3183   assert(SrsrcIdx > VAddr0Idx);
3184 
3185   if (DimIdx == -1)
3186     return true; // intersect_ray
3187 
3188   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3189   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3190   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3191   unsigned VAddrSize =
3192       IsNSA ? SrsrcIdx - VAddr0Idx
3193             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3194 
3195   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3196                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3197                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3198                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3199   if (!IsNSA) {
3200     if (AddrSize > 8)
3201       AddrSize = 16;
3202     else if (AddrSize > 4)
3203       AddrSize = 8;
3204   }
3205 
3206   return VAddrSize == AddrSize;
3207 }
3208 
3209 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3210 
3211   const unsigned Opc = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opc);
3213 
3214   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3215     return true;
3216   if (!Desc.mayLoad() || !Desc.mayStore())
3217     return true; // Not atomic
3218 
3219   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3220   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3221 
3222   // This is an incomplete check because image_atomic_cmpswap
3223   // may only use 0x3 and 0xf while other atomic operations
3224   // may use 0x1 and 0x3. However these limitations are
3225   // verified when we check that dmask matches dst size.
3226   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3227 }
3228 
3229 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3230 
3231   const unsigned Opc = Inst.getOpcode();
3232   const MCInstrDesc &Desc = MII.get(Opc);
3233 
3234   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3235     return true;
3236 
3237   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3238   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3239 
3240   // GATHER4 instructions use dmask in a different fashion compared to
3241   // other MIMG instructions. The only useful DMASK values are
3242   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3243   // (red,red,red,red) etc.) The ISA document doesn't mention
3244   // this.
3245   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3246 }
3247 
3248 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3249 {
3250   switch (Opcode) {
3251   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3252   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3253   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3254     return true;
3255   default:
3256     return false;
3257   }
3258 }
3259 
3260 // movrels* opcodes should only allow VGPRS as src0.
3261 // This is specified in .td description for vop1/vop3,
3262 // but sdwa is handled differently. See isSDWAOperand.
3263 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3264 
3265   const unsigned Opc = Inst.getOpcode();
3266   const MCInstrDesc &Desc = MII.get(Opc);
3267 
3268   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3269     return true;
3270 
3271   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3272   assert(Src0Idx != -1);
3273 
3274   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3275   if (!Src0.isReg())
3276     return false;
3277 
3278   auto Reg = Src0.getReg();
3279   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3280   return !isSGPR(mc2PseudoReg(Reg), TRI);
3281 }
3282 
3283 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3284 
3285   const unsigned Opc = Inst.getOpcode();
3286 
3287   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3288     return true;
3289 
3290   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3291   assert(Src0Idx != -1);
3292 
3293   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3294   if (!Src0.isReg())
3295     return true;
3296 
3297   auto Reg = Src0.getReg();
3298   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3299   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3300     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3301     return false;
3302   }
3303 
3304   return true;
3305 }
3306 
3307 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3308 
3309   const unsigned Opc = Inst.getOpcode();
3310   const MCInstrDesc &Desc = MII.get(Opc);
3311 
3312   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3313     return true;
3314 
3315   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3316   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3317     if (isCI() || isSI())
3318       return false;
3319   }
3320 
3321   return true;
3322 }
3323 
3324 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3325   const unsigned Opc = Inst.getOpcode();
3326   const MCInstrDesc &Desc = MII.get(Opc);
3327 
3328   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3329     return true;
3330 
3331   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3332   if (DimIdx < 0)
3333     return true;
3334 
3335   long Imm = Inst.getOperand(DimIdx).getImm();
3336   if (Imm < 0 || Imm >= 8)
3337     return false;
3338 
3339   return true;
3340 }
3341 
3342 static bool IsRevOpcode(const unsigned Opcode)
3343 {
3344   switch (Opcode) {
3345   case AMDGPU::V_SUBREV_F32_e32:
3346   case AMDGPU::V_SUBREV_F32_e64:
3347   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3348   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3349   case AMDGPU::V_SUBREV_F32_e32_vi:
3350   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3351   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3352   case AMDGPU::V_SUBREV_F32_e64_vi:
3353 
3354   case AMDGPU::V_SUBREV_CO_U32_e32:
3355   case AMDGPU::V_SUBREV_CO_U32_e64:
3356   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3357   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3358 
3359   case AMDGPU::V_SUBBREV_U32_e32:
3360   case AMDGPU::V_SUBBREV_U32_e64:
3361   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3362   case AMDGPU::V_SUBBREV_U32_e32_vi:
3363   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3364   case AMDGPU::V_SUBBREV_U32_e64_vi:
3365 
3366   case AMDGPU::V_SUBREV_U32_e32:
3367   case AMDGPU::V_SUBREV_U32_e64:
3368   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3369   case AMDGPU::V_SUBREV_U32_e32_vi:
3370   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3371   case AMDGPU::V_SUBREV_U32_e64_vi:
3372 
3373   case AMDGPU::V_SUBREV_F16_e32:
3374   case AMDGPU::V_SUBREV_F16_e64:
3375   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3376   case AMDGPU::V_SUBREV_F16_e32_vi:
3377   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3378   case AMDGPU::V_SUBREV_F16_e64_vi:
3379 
3380   case AMDGPU::V_SUBREV_U16_e32:
3381   case AMDGPU::V_SUBREV_U16_e64:
3382   case AMDGPU::V_SUBREV_U16_e32_vi:
3383   case AMDGPU::V_SUBREV_U16_e64_vi:
3384 
3385   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3386   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3387   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3388 
3389   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3390   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3391 
3392   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3393   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3394 
3395   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3396   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3397 
3398   case AMDGPU::V_LSHRREV_B32_e32:
3399   case AMDGPU::V_LSHRREV_B32_e64:
3400   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3401   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3402   case AMDGPU::V_LSHRREV_B32_e32_vi:
3403   case AMDGPU::V_LSHRREV_B32_e64_vi:
3404   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3405   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3406 
3407   case AMDGPU::V_ASHRREV_I32_e32:
3408   case AMDGPU::V_ASHRREV_I32_e64:
3409   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3410   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3411   case AMDGPU::V_ASHRREV_I32_e32_vi:
3412   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3413   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3414   case AMDGPU::V_ASHRREV_I32_e64_vi:
3415 
3416   case AMDGPU::V_LSHLREV_B32_e32:
3417   case AMDGPU::V_LSHLREV_B32_e64:
3418   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3419   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3420   case AMDGPU::V_LSHLREV_B32_e32_vi:
3421   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3422   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3423   case AMDGPU::V_LSHLREV_B32_e64_vi:
3424 
3425   case AMDGPU::V_LSHLREV_B16_e32:
3426   case AMDGPU::V_LSHLREV_B16_e64:
3427   case AMDGPU::V_LSHLREV_B16_e32_vi:
3428   case AMDGPU::V_LSHLREV_B16_e64_vi:
3429   case AMDGPU::V_LSHLREV_B16_gfx10:
3430 
3431   case AMDGPU::V_LSHRREV_B16_e32:
3432   case AMDGPU::V_LSHRREV_B16_e64:
3433   case AMDGPU::V_LSHRREV_B16_e32_vi:
3434   case AMDGPU::V_LSHRREV_B16_e64_vi:
3435   case AMDGPU::V_LSHRREV_B16_gfx10:
3436 
3437   case AMDGPU::V_ASHRREV_I16_e32:
3438   case AMDGPU::V_ASHRREV_I16_e64:
3439   case AMDGPU::V_ASHRREV_I16_e32_vi:
3440   case AMDGPU::V_ASHRREV_I16_e64_vi:
3441   case AMDGPU::V_ASHRREV_I16_gfx10:
3442 
3443   case AMDGPU::V_LSHLREV_B64:
3444   case AMDGPU::V_LSHLREV_B64_gfx10:
3445   case AMDGPU::V_LSHLREV_B64_vi:
3446 
3447   case AMDGPU::V_LSHRREV_B64:
3448   case AMDGPU::V_LSHRREV_B64_gfx10:
3449   case AMDGPU::V_LSHRREV_B64_vi:
3450 
3451   case AMDGPU::V_ASHRREV_I64:
3452   case AMDGPU::V_ASHRREV_I64_gfx10:
3453   case AMDGPU::V_ASHRREV_I64_vi:
3454 
3455   case AMDGPU::V_PK_LSHLREV_B16:
3456   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3457   case AMDGPU::V_PK_LSHLREV_B16_vi:
3458 
3459   case AMDGPU::V_PK_LSHRREV_B16:
3460   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3461   case AMDGPU::V_PK_LSHRREV_B16_vi:
3462   case AMDGPU::V_PK_ASHRREV_I16:
3463   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3464   case AMDGPU::V_PK_ASHRREV_I16_vi:
3465     return true;
3466   default:
3467     return false;
3468   }
3469 }
3470 
3471 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3472 
3473   using namespace SIInstrFlags;
3474   const unsigned Opcode = Inst.getOpcode();
3475   const MCInstrDesc &Desc = MII.get(Opcode);
3476 
3477   // lds_direct register is defined so that it can be used
3478   // with 9-bit operands only. Ignore encodings which do not accept these.
3479   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3480     return true;
3481 
3482   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3483   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3484   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3485 
3486   const int SrcIndices[] = { Src1Idx, Src2Idx };
3487 
3488   // lds_direct cannot be specified as either src1 or src2.
3489   for (int SrcIdx : SrcIndices) {
3490     if (SrcIdx == -1) break;
3491     const MCOperand &Src = Inst.getOperand(SrcIdx);
3492     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3493       return false;
3494     }
3495   }
3496 
3497   if (Src0Idx == -1)
3498     return true;
3499 
3500   const MCOperand &Src = Inst.getOperand(Src0Idx);
3501   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3502     return true;
3503 
3504   // lds_direct is specified as src0. Check additional limitations.
3505   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3506 }
3507 
3508 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3509   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3510     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3511     if (Op.isFlatOffset())
3512       return Op.getStartLoc();
3513   }
3514   return getLoc();
3515 }
3516 
3517 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3518                                          const OperandVector &Operands) {
3519   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3520   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3521     return true;
3522 
3523   auto Opcode = Inst.getOpcode();
3524   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3525   assert(OpNum != -1);
3526 
3527   const auto &Op = Inst.getOperand(OpNum);
3528   if (!hasFlatOffsets() && Op.getImm() != 0) {
3529     Error(getFlatOffsetLoc(Operands),
3530           "flat offset modifier is not supported on this GPU");
3531     return false;
3532   }
3533 
3534   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3535   // For FLAT segment the offset must be positive;
3536   // MSB is ignored and forced to zero.
3537   unsigned OffsetSize = isGFX9() ? 13 : 12;
3538   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3539     if (!isIntN(OffsetSize, Op.getImm())) {
3540       Error(getFlatOffsetLoc(Operands),
3541             isGFX9() ? "expected a 13-bit signed offset" :
3542                        "expected a 12-bit signed offset");
3543       return false;
3544     }
3545   } else {
3546     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3547       Error(getFlatOffsetLoc(Operands),
3548             isGFX9() ? "expected a 12-bit unsigned offset" :
3549                        "expected an 11-bit unsigned offset");
3550       return false;
3551     }
3552   }
3553 
3554   return true;
3555 }
3556 
3557 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3558   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3559     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3560     if (Op.isSMEMOffset())
3561       return Op.getStartLoc();
3562   }
3563   return getLoc();
3564 }
3565 
3566 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3567                                          const OperandVector &Operands) {
3568   if (isCI() || isSI())
3569     return true;
3570 
3571   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3572   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3573     return true;
3574 
3575   auto Opcode = Inst.getOpcode();
3576   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3577   if (OpNum == -1)
3578     return true;
3579 
3580   const auto &Op = Inst.getOperand(OpNum);
3581   if (!Op.isImm())
3582     return true;
3583 
3584   uint64_t Offset = Op.getImm();
3585   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3586   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3587       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3588     return true;
3589 
3590   Error(getSMEMOffsetLoc(Operands),
3591         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3592                                "expected a 21-bit signed offset");
3593 
3594   return false;
3595 }
3596 
3597 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3598   unsigned Opcode = Inst.getOpcode();
3599   const MCInstrDesc &Desc = MII.get(Opcode);
3600   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3601     return true;
3602 
3603   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3604   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3605 
3606   const int OpIndices[] = { Src0Idx, Src1Idx };
3607 
3608   unsigned NumExprs = 0;
3609   unsigned NumLiterals = 0;
3610   uint32_t LiteralValue;
3611 
3612   for (int OpIdx : OpIndices) {
3613     if (OpIdx == -1) break;
3614 
3615     const MCOperand &MO = Inst.getOperand(OpIdx);
3616     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3617     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3618       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3619         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3620         if (NumLiterals == 0 || LiteralValue != Value) {
3621           LiteralValue = Value;
3622           ++NumLiterals;
3623         }
3624       } else if (MO.isExpr()) {
3625         ++NumExprs;
3626       }
3627     }
3628   }
3629 
3630   return NumLiterals + NumExprs <= 1;
3631 }
3632 
3633 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3634   const unsigned Opc = Inst.getOpcode();
3635   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3636       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3637     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3638     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3639 
3640     if (OpSel & ~3)
3641       return false;
3642   }
3643   return true;
3644 }
3645 
3646 // Check if VCC register matches wavefront size
3647 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3648   auto FB = getFeatureBits();
3649   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3650     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3651 }
3652 
3653 // VOP3 literal is only allowed in GFX10+ and only one can be used
3654 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3655   unsigned Opcode = Inst.getOpcode();
3656   const MCInstrDesc &Desc = MII.get(Opcode);
3657   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3658     return true;
3659 
3660   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3661   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3662   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3663 
3664   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3665 
3666   unsigned NumExprs = 0;
3667   unsigned NumLiterals = 0;
3668   uint32_t LiteralValue;
3669 
3670   for (int OpIdx : OpIndices) {
3671     if (OpIdx == -1) break;
3672 
3673     const MCOperand &MO = Inst.getOperand(OpIdx);
3674     if (!MO.isImm() && !MO.isExpr())
3675       continue;
3676     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3677       continue;
3678 
3679     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3680         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3681       return false;
3682 
3683     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3684       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3685       if (NumLiterals == 0 || LiteralValue != Value) {
3686         LiteralValue = Value;
3687         ++NumLiterals;
3688       }
3689     } else if (MO.isExpr()) {
3690       ++NumExprs;
3691     }
3692   }
3693   NumLiterals += NumExprs;
3694 
3695   return !NumLiterals ||
3696          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3697 }
3698 
3699 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3700                                           const SMLoc &IDLoc,
3701                                           const OperandVector &Operands) {
3702   if (!validateLdsDirect(Inst)) {
3703     Error(IDLoc,
3704       "invalid use of lds_direct");
3705     return false;
3706   }
3707   if (!validateSOPLiteral(Inst)) {
3708     Error(IDLoc,
3709       "only one literal operand is allowed");
3710     return false;
3711   }
3712   if (!validateVOP3Literal(Inst)) {
3713     Error(IDLoc,
3714       "invalid literal operand");
3715     return false;
3716   }
3717   if (!validateConstantBusLimitations(Inst)) {
3718     Error(IDLoc,
3719       "invalid operand (violates constant bus restrictions)");
3720     return false;
3721   }
3722   if (!validateEarlyClobberLimitations(Inst)) {
3723     Error(IDLoc,
3724       "destination must be different than all sources");
3725     return false;
3726   }
3727   if (!validateIntClampSupported(Inst)) {
3728     Error(IDLoc,
3729       "integer clamping is not supported on this GPU");
3730     return false;
3731   }
3732   if (!validateOpSel(Inst)) {
3733     Error(IDLoc,
3734       "invalid op_sel operand");
3735     return false;
3736   }
3737   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3738   if (!validateMIMGD16(Inst)) {
3739     Error(IDLoc,
3740       "d16 modifier is not supported on this GPU");
3741     return false;
3742   }
3743   if (!validateMIMGDim(Inst)) {
3744     Error(IDLoc, "dim modifier is required on this GPU");
3745     return false;
3746   }
3747   if (!validateMIMGDataSize(Inst)) {
3748     Error(IDLoc,
3749       "image data size does not match dmask and tfe");
3750     return false;
3751   }
3752   if (!validateMIMGAddrSize(Inst)) {
3753     Error(IDLoc,
3754       "image address size does not match dim and a16");
3755     return false;
3756   }
3757   if (!validateMIMGAtomicDMask(Inst)) {
3758     Error(IDLoc,
3759       "invalid atomic image dmask");
3760     return false;
3761   }
3762   if (!validateMIMGGatherDMask(Inst)) {
3763     Error(IDLoc,
3764       "invalid image_gather dmask: only one bit must be set");
3765     return false;
3766   }
3767   if (!validateMovrels(Inst)) {
3768     Error(IDLoc, "source operand must be a VGPR");
3769     return false;
3770   }
3771   if (!validateFlatOffset(Inst, Operands)) {
3772     return false;
3773   }
3774   if (!validateSMEMOffset(Inst, Operands)) {
3775     return false;
3776   }
3777   if (!validateMAIAccWrite(Inst)) {
3778     return false;
3779   }
3780 
3781   return true;
3782 }
3783 
3784 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3785                                             const FeatureBitset &FBS,
3786                                             unsigned VariantID = 0);
3787 
3788 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3789                                 const FeatureBitset &AvailableFeatures,
3790                                 unsigned VariantID);
3791 
3792 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3793                                        const FeatureBitset &FBS) {
3794   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3795 }
3796 
3797 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3798                                        const FeatureBitset &FBS,
3799                                        ArrayRef<unsigned> Variants) {
3800   for (auto Variant : Variants) {
3801     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3802       return true;
3803   }
3804 
3805   return false;
3806 }
3807 
3808 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3809                                                   const SMLoc &IDLoc) {
3810   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3811 
3812   // Check if requested instruction variant is supported.
3813   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3814     return false;
3815 
3816   // This instruction is not supported.
3817   // Clear any other pending errors because they are no longer relevant.
3818   getParser().clearPendingErrors();
3819 
3820   // Requested instruction variant is not supported.
3821   // Check if any other variants are supported.
3822   StringRef VariantName = getMatchedVariantName();
3823   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3824     return Error(IDLoc,
3825                  Twine(VariantName,
3826                        " variant of this instruction is not supported"));
3827   }
3828 
3829   // Finally check if this instruction is supported on any other GPU.
3830   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3831     return Error(IDLoc, "instruction not supported on this GPU");
3832   }
3833 
3834   // Instruction not supported on any GPU. Probably a typo.
3835   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3836   return Error(IDLoc, "invalid instruction" + Suggestion);
3837 }
3838 
3839 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3840                                               OperandVector &Operands,
3841                                               MCStreamer &Out,
3842                                               uint64_t &ErrorInfo,
3843                                               bool MatchingInlineAsm) {
3844   MCInst Inst;
3845   unsigned Result = Match_Success;
3846   for (auto Variant : getMatchedVariants()) {
3847     uint64_t EI;
3848     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3849                                   Variant);
3850     // We order match statuses from least to most specific. We use most specific
3851     // status as resulting
3852     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3853     if ((R == Match_Success) ||
3854         (R == Match_PreferE32) ||
3855         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3856         (R == Match_InvalidOperand && Result != Match_MissingFeature
3857                                    && Result != Match_PreferE32) ||
3858         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3859                                    && Result != Match_MissingFeature
3860                                    && Result != Match_PreferE32)) {
3861       Result = R;
3862       ErrorInfo = EI;
3863     }
3864     if (R == Match_Success)
3865       break;
3866   }
3867 
3868   if (Result == Match_Success) {
3869     if (!validateInstruction(Inst, IDLoc, Operands)) {
3870       return true;
3871     }
3872     Inst.setLoc(IDLoc);
3873     Out.emitInstruction(Inst, getSTI());
3874     return false;
3875   }
3876 
3877   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
3878   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
3879     return true;
3880   }
3881 
3882   switch (Result) {
3883   default: break;
3884   case Match_MissingFeature:
3885     // It has been verified that the specified instruction
3886     // mnemonic is valid. A match was found but it requires
3887     // features which are not supported on this GPU.
3888     return Error(IDLoc, "operands are not valid for this GPU or mode");
3889 
3890   case Match_InvalidOperand: {
3891     SMLoc ErrorLoc = IDLoc;
3892     if (ErrorInfo != ~0ULL) {
3893       if (ErrorInfo >= Operands.size()) {
3894         return Error(IDLoc, "too few operands for instruction");
3895       }
3896       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3897       if (ErrorLoc == SMLoc())
3898         ErrorLoc = IDLoc;
3899     }
3900     return Error(ErrorLoc, "invalid operand for instruction");
3901   }
3902 
3903   case Match_PreferE32:
3904     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3905                         "should be encoded as e32");
3906   case Match_MnemonicFail:
3907     llvm_unreachable("Invalid instructions should have been handled already");
3908   }
3909   llvm_unreachable("Implement any new match types added!");
3910 }
3911 
3912 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3913   int64_t Tmp = -1;
3914   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3915     return true;
3916   }
3917   if (getParser().parseAbsoluteExpression(Tmp)) {
3918     return true;
3919   }
3920   Ret = static_cast<uint32_t>(Tmp);
3921   return false;
3922 }
3923 
3924 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3925                                                uint32_t &Minor) {
3926   if (ParseAsAbsoluteExpression(Major))
3927     return TokError("invalid major version");
3928 
3929   if (getLexer().isNot(AsmToken::Comma))
3930     return TokError("minor version number required, comma expected");
3931   Lex();
3932 
3933   if (ParseAsAbsoluteExpression(Minor))
3934     return TokError("invalid minor version");
3935 
3936   return false;
3937 }
3938 
3939 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3940   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3941     return TokError("directive only supported for amdgcn architecture");
3942 
3943   std::string Target;
3944 
3945   SMLoc TargetStart = getTok().getLoc();
3946   if (getParser().parseEscapedString(Target))
3947     return true;
3948   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3949 
3950   std::string ExpectedTarget;
3951   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3952   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3953 
3954   if (Target != ExpectedTargetOS.str())
3955     return getParser().Error(TargetRange.Start, "target must match options",
3956                              TargetRange);
3957 
3958   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3959   return false;
3960 }
3961 
3962 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3963   return getParser().Error(Range.Start, "value out of range", Range);
3964 }
3965 
3966 bool AMDGPUAsmParser::calculateGPRBlocks(
3967     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3968     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3969     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3970     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3971   // TODO(scott.linder): These calculations are duplicated from
3972   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3973   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3974 
3975   unsigned NumVGPRs = NextFreeVGPR;
3976   unsigned NumSGPRs = NextFreeSGPR;
3977 
3978   if (Version.Major >= 10)
3979     NumSGPRs = 0;
3980   else {
3981     unsigned MaxAddressableNumSGPRs =
3982         IsaInfo::getAddressableNumSGPRs(&getSTI());
3983 
3984     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3985         NumSGPRs > MaxAddressableNumSGPRs)
3986       return OutOfRangeError(SGPRRange);
3987 
3988     NumSGPRs +=
3989         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3990 
3991     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3992         NumSGPRs > MaxAddressableNumSGPRs)
3993       return OutOfRangeError(SGPRRange);
3994 
3995     if (Features.test(FeatureSGPRInitBug))
3996       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3997   }
3998 
3999   VGPRBlocks =
4000       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4001   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4002 
4003   return false;
4004 }
4005 
4006 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4007   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4008     return TokError("directive only supported for amdgcn architecture");
4009 
4010   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4011     return TokError("directive only supported for amdhsa OS");
4012 
4013   StringRef KernelName;
4014   if (getParser().parseIdentifier(KernelName))
4015     return true;
4016 
4017   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4018 
4019   StringSet<> Seen;
4020 
4021   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4022 
4023   SMRange VGPRRange;
4024   uint64_t NextFreeVGPR = 0;
4025   SMRange SGPRRange;
4026   uint64_t NextFreeSGPR = 0;
4027   unsigned UserSGPRCount = 0;
4028   bool ReserveVCC = true;
4029   bool ReserveFlatScr = true;
4030   bool ReserveXNACK = hasXNACK();
4031   Optional<bool> EnableWavefrontSize32;
4032 
4033   while (true) {
4034     while (getLexer().is(AsmToken::EndOfStatement))
4035       Lex();
4036 
4037     if (getLexer().isNot(AsmToken::Identifier))
4038       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4039 
4040     StringRef ID = getTok().getIdentifier();
4041     SMRange IDRange = getTok().getLocRange();
4042     Lex();
4043 
4044     if (ID == ".end_amdhsa_kernel")
4045       break;
4046 
4047     if (Seen.find(ID) != Seen.end())
4048       return TokError(".amdhsa_ directives cannot be repeated");
4049     Seen.insert(ID);
4050 
4051     SMLoc ValStart = getTok().getLoc();
4052     int64_t IVal;
4053     if (getParser().parseAbsoluteExpression(IVal))
4054       return true;
4055     SMLoc ValEnd = getTok().getLoc();
4056     SMRange ValRange = SMRange(ValStart, ValEnd);
4057 
4058     if (IVal < 0)
4059       return OutOfRangeError(ValRange);
4060 
4061     uint64_t Val = IVal;
4062 
4063 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4064   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4065     return OutOfRangeError(RANGE);                                             \
4066   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4067 
4068     if (ID == ".amdhsa_group_segment_fixed_size") {
4069       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4070         return OutOfRangeError(ValRange);
4071       KD.group_segment_fixed_size = Val;
4072     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4073       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4074         return OutOfRangeError(ValRange);
4075       KD.private_segment_fixed_size = Val;
4076     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4077       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4078                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4079                        Val, ValRange);
4080       if (Val)
4081         UserSGPRCount += 4;
4082     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4083       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4084                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4085                        ValRange);
4086       if (Val)
4087         UserSGPRCount += 2;
4088     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4089       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4090                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4091                        ValRange);
4092       if (Val)
4093         UserSGPRCount += 2;
4094     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4095       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4096                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4097                        Val, ValRange);
4098       if (Val)
4099         UserSGPRCount += 2;
4100     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4101       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4102                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4103                        ValRange);
4104       if (Val)
4105         UserSGPRCount += 2;
4106     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4107       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4108                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4109                        ValRange);
4110       if (Val)
4111         UserSGPRCount += 2;
4112     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4113       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4114                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4115                        Val, ValRange);
4116       if (Val)
4117         UserSGPRCount += 1;
4118     } else if (ID == ".amdhsa_wavefront_size32") {
4119       if (IVersion.Major < 10)
4120         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4121                                  IDRange);
4122       EnableWavefrontSize32 = Val;
4123       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4124                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4125                        Val, ValRange);
4126     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4127       PARSE_BITS_ENTRY(
4128           KD.compute_pgm_rsrc2,
4129           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4130           ValRange);
4131     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4132       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4133                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4134                        ValRange);
4135     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4136       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4137                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4138                        ValRange);
4139     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4140       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4141                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4142                        ValRange);
4143     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4144       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4145                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4146                        ValRange);
4147     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4148       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4149                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4150                        ValRange);
4151     } else if (ID == ".amdhsa_next_free_vgpr") {
4152       VGPRRange = ValRange;
4153       NextFreeVGPR = Val;
4154     } else if (ID == ".amdhsa_next_free_sgpr") {
4155       SGPRRange = ValRange;
4156       NextFreeSGPR = Val;
4157     } else if (ID == ".amdhsa_reserve_vcc") {
4158       if (!isUInt<1>(Val))
4159         return OutOfRangeError(ValRange);
4160       ReserveVCC = Val;
4161     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4162       if (IVersion.Major < 7)
4163         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4164                                  IDRange);
4165       if (!isUInt<1>(Val))
4166         return OutOfRangeError(ValRange);
4167       ReserveFlatScr = Val;
4168     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4169       if (IVersion.Major < 8)
4170         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4171                                  IDRange);
4172       if (!isUInt<1>(Val))
4173         return OutOfRangeError(ValRange);
4174       ReserveXNACK = Val;
4175     } else if (ID == ".amdhsa_float_round_mode_32") {
4176       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4177                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4178     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4179       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4180                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4181     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4182       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4183                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4184     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4185       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4186                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4187                        ValRange);
4188     } else if (ID == ".amdhsa_dx10_clamp") {
4189       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4190                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4191     } else if (ID == ".amdhsa_ieee_mode") {
4192       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4193                        Val, ValRange);
4194     } else if (ID == ".amdhsa_fp16_overflow") {
4195       if (IVersion.Major < 9)
4196         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4197                                  IDRange);
4198       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4199                        ValRange);
4200     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4201       if (IVersion.Major < 10)
4202         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4203                                  IDRange);
4204       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4205                        ValRange);
4206     } else if (ID == ".amdhsa_memory_ordered") {
4207       if (IVersion.Major < 10)
4208         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4209                                  IDRange);
4210       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4211                        ValRange);
4212     } else if (ID == ".amdhsa_forward_progress") {
4213       if (IVersion.Major < 10)
4214         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4215                                  IDRange);
4216       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4217                        ValRange);
4218     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4219       PARSE_BITS_ENTRY(
4220           KD.compute_pgm_rsrc2,
4221           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4222           ValRange);
4223     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4224       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4225                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4226                        Val, ValRange);
4227     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4228       PARSE_BITS_ENTRY(
4229           KD.compute_pgm_rsrc2,
4230           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4231           ValRange);
4232     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4233       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4234                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4235                        Val, ValRange);
4236     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4237       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4238                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4239                        Val, ValRange);
4240     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4241       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4242                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4243                        Val, ValRange);
4244     } else if (ID == ".amdhsa_exception_int_div_zero") {
4245       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4246                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4247                        Val, ValRange);
4248     } else {
4249       return getParser().Error(IDRange.Start,
4250                                "unknown .amdhsa_kernel directive", IDRange);
4251     }
4252 
4253 #undef PARSE_BITS_ENTRY
4254   }
4255 
4256   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4257     return TokError(".amdhsa_next_free_vgpr directive is required");
4258 
4259   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4260     return TokError(".amdhsa_next_free_sgpr directive is required");
4261 
4262   unsigned VGPRBlocks;
4263   unsigned SGPRBlocks;
4264   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4265                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4266                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4267                          SGPRBlocks))
4268     return true;
4269 
4270   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4271           VGPRBlocks))
4272     return OutOfRangeError(VGPRRange);
4273   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4274                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4275 
4276   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4277           SGPRBlocks))
4278     return OutOfRangeError(SGPRRange);
4279   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4280                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4281                   SGPRBlocks);
4282 
4283   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4284     return TokError("too many user SGPRs enabled");
4285   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4286                   UserSGPRCount);
4287 
4288   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4289       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4290       ReserveFlatScr, ReserveXNACK);
4291   return false;
4292 }
4293 
4294 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4295   uint32_t Major;
4296   uint32_t Minor;
4297 
4298   if (ParseDirectiveMajorMinor(Major, Minor))
4299     return true;
4300 
4301   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4302   return false;
4303 }
4304 
4305 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4306   uint32_t Major;
4307   uint32_t Minor;
4308   uint32_t Stepping;
4309   StringRef VendorName;
4310   StringRef ArchName;
4311 
4312   // If this directive has no arguments, then use the ISA version for the
4313   // targeted GPU.
4314   if (getLexer().is(AsmToken::EndOfStatement)) {
4315     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4316     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4317                                                       ISA.Stepping,
4318                                                       "AMD", "AMDGPU");
4319     return false;
4320   }
4321 
4322   if (ParseDirectiveMajorMinor(Major, Minor))
4323     return true;
4324 
4325   if (getLexer().isNot(AsmToken::Comma))
4326     return TokError("stepping version number required, comma expected");
4327   Lex();
4328 
4329   if (ParseAsAbsoluteExpression(Stepping))
4330     return TokError("invalid stepping version");
4331 
4332   if (getLexer().isNot(AsmToken::Comma))
4333     return TokError("vendor name required, comma expected");
4334   Lex();
4335 
4336   if (getLexer().isNot(AsmToken::String))
4337     return TokError("invalid vendor name");
4338 
4339   VendorName = getLexer().getTok().getStringContents();
4340   Lex();
4341 
4342   if (getLexer().isNot(AsmToken::Comma))
4343     return TokError("arch name required, comma expected");
4344   Lex();
4345 
4346   if (getLexer().isNot(AsmToken::String))
4347     return TokError("invalid arch name");
4348 
4349   ArchName = getLexer().getTok().getStringContents();
4350   Lex();
4351 
4352   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4353                                                     VendorName, ArchName);
4354   return false;
4355 }
4356 
4357 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4358                                                amd_kernel_code_t &Header) {
4359   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4360   // assembly for backwards compatibility.
4361   if (ID == "max_scratch_backing_memory_byte_size") {
4362     Parser.eatToEndOfStatement();
4363     return false;
4364   }
4365 
4366   SmallString<40> ErrStr;
4367   raw_svector_ostream Err(ErrStr);
4368   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4369     return TokError(Err.str());
4370   }
4371   Lex();
4372 
4373   if (ID == "enable_wavefront_size32") {
4374     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4375       if (!isGFX10())
4376         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4377       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4378         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4379     } else {
4380       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4381         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4382     }
4383   }
4384 
4385   if (ID == "wavefront_size") {
4386     if (Header.wavefront_size == 5) {
4387       if (!isGFX10())
4388         return TokError("wavefront_size=5 is only allowed on GFX10+");
4389       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4390         return TokError("wavefront_size=5 requires +WavefrontSize32");
4391     } else if (Header.wavefront_size == 6) {
4392       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4393         return TokError("wavefront_size=6 requires +WavefrontSize64");
4394     }
4395   }
4396 
4397   if (ID == "enable_wgp_mode") {
4398     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4399       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4400   }
4401 
4402   if (ID == "enable_mem_ordered") {
4403     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4404       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4405   }
4406 
4407   if (ID == "enable_fwd_progress") {
4408     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4409       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4410   }
4411 
4412   return false;
4413 }
4414 
4415 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4416   amd_kernel_code_t Header;
4417   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4418 
4419   while (true) {
4420     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4421     // will set the current token to EndOfStatement.
4422     while(getLexer().is(AsmToken::EndOfStatement))
4423       Lex();
4424 
4425     if (getLexer().isNot(AsmToken::Identifier))
4426       return TokError("expected value identifier or .end_amd_kernel_code_t");
4427 
4428     StringRef ID = getLexer().getTok().getIdentifier();
4429     Lex();
4430 
4431     if (ID == ".end_amd_kernel_code_t")
4432       break;
4433 
4434     if (ParseAMDKernelCodeTValue(ID, Header))
4435       return true;
4436   }
4437 
4438   getTargetStreamer().EmitAMDKernelCodeT(Header);
4439 
4440   return false;
4441 }
4442 
4443 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4444   if (getLexer().isNot(AsmToken::Identifier))
4445     return TokError("expected symbol name");
4446 
4447   StringRef KernelName = Parser.getTok().getString();
4448 
4449   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4450                                            ELF::STT_AMDGPU_HSA_KERNEL);
4451   Lex();
4452 
4453   KernelScope.initialize(getContext());
4454   return false;
4455 }
4456 
4457 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4458   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4459     return Error(getParser().getTok().getLoc(),
4460                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4461                  "architectures");
4462   }
4463 
4464   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4465 
4466   std::string ISAVersionStringFromSTI;
4467   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4468   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4469 
4470   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4471     return Error(getParser().getTok().getLoc(),
4472                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4473                  "arguments specified through the command line");
4474   }
4475 
4476   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4477   Lex();
4478 
4479   return false;
4480 }
4481 
4482 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4483   const char *AssemblerDirectiveBegin;
4484   const char *AssemblerDirectiveEnd;
4485   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4486       isHsaAbiVersion3(&getSTI())
4487           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4488                             HSAMD::V3::AssemblerDirectiveEnd)
4489           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4490                             HSAMD::AssemblerDirectiveEnd);
4491 
4492   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4493     return Error(getParser().getTok().getLoc(),
4494                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4495                  "not available on non-amdhsa OSes")).str());
4496   }
4497 
4498   std::string HSAMetadataString;
4499   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4500                           HSAMetadataString))
4501     return true;
4502 
4503   if (isHsaAbiVersion3(&getSTI())) {
4504     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4505       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4506   } else {
4507     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4508       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4509   }
4510 
4511   return false;
4512 }
4513 
4514 /// Common code to parse out a block of text (typically YAML) between start and
4515 /// end directives.
4516 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4517                                           const char *AssemblerDirectiveEnd,
4518                                           std::string &CollectString) {
4519 
4520   raw_string_ostream CollectStream(CollectString);
4521 
4522   getLexer().setSkipSpace(false);
4523 
4524   bool FoundEnd = false;
4525   while (!getLexer().is(AsmToken::Eof)) {
4526     while (getLexer().is(AsmToken::Space)) {
4527       CollectStream << getLexer().getTok().getString();
4528       Lex();
4529     }
4530 
4531     if (getLexer().is(AsmToken::Identifier)) {
4532       StringRef ID = getLexer().getTok().getIdentifier();
4533       if (ID == AssemblerDirectiveEnd) {
4534         Lex();
4535         FoundEnd = true;
4536         break;
4537       }
4538     }
4539 
4540     CollectStream << Parser.parseStringToEndOfStatement()
4541                   << getContext().getAsmInfo()->getSeparatorString();
4542 
4543     Parser.eatToEndOfStatement();
4544   }
4545 
4546   getLexer().setSkipSpace(true);
4547 
4548   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4549     return TokError(Twine("expected directive ") +
4550                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4551   }
4552 
4553   CollectStream.flush();
4554   return false;
4555 }
4556 
4557 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4558 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4559   std::string String;
4560   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4561                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4562     return true;
4563 
4564   auto PALMetadata = getTargetStreamer().getPALMetadata();
4565   if (!PALMetadata->setFromString(String))
4566     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4567   return false;
4568 }
4569 
4570 /// Parse the assembler directive for old linear-format PAL metadata.
4571 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4572   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4573     return Error(getParser().getTok().getLoc(),
4574                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4575                  "not available on non-amdpal OSes")).str());
4576   }
4577 
4578   auto PALMetadata = getTargetStreamer().getPALMetadata();
4579   PALMetadata->setLegacy();
4580   for (;;) {
4581     uint32_t Key, Value;
4582     if (ParseAsAbsoluteExpression(Key)) {
4583       return TokError(Twine("invalid value in ") +
4584                       Twine(PALMD::AssemblerDirective));
4585     }
4586     if (getLexer().isNot(AsmToken::Comma)) {
4587       return TokError(Twine("expected an even number of values in ") +
4588                       Twine(PALMD::AssemblerDirective));
4589     }
4590     Lex();
4591     if (ParseAsAbsoluteExpression(Value)) {
4592       return TokError(Twine("invalid value in ") +
4593                       Twine(PALMD::AssemblerDirective));
4594     }
4595     PALMetadata->setRegister(Key, Value);
4596     if (getLexer().isNot(AsmToken::Comma))
4597       break;
4598     Lex();
4599   }
4600   return false;
4601 }
4602 
4603 /// ParseDirectiveAMDGPULDS
4604 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4605 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4606   if (getParser().checkForValidSection())
4607     return true;
4608 
4609   StringRef Name;
4610   SMLoc NameLoc = getLexer().getLoc();
4611   if (getParser().parseIdentifier(Name))
4612     return TokError("expected identifier in directive");
4613 
4614   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4615   if (parseToken(AsmToken::Comma, "expected ','"))
4616     return true;
4617 
4618   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4619 
4620   int64_t Size;
4621   SMLoc SizeLoc = getLexer().getLoc();
4622   if (getParser().parseAbsoluteExpression(Size))
4623     return true;
4624   if (Size < 0)
4625     return Error(SizeLoc, "size must be non-negative");
4626   if (Size > LocalMemorySize)
4627     return Error(SizeLoc, "size is too large");
4628 
4629   int64_t Alignment = 4;
4630   if (getLexer().is(AsmToken::Comma)) {
4631     Lex();
4632     SMLoc AlignLoc = getLexer().getLoc();
4633     if (getParser().parseAbsoluteExpression(Alignment))
4634       return true;
4635     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4636       return Error(AlignLoc, "alignment must be a power of two");
4637 
4638     // Alignment larger than the size of LDS is possible in theory, as long
4639     // as the linker manages to place to symbol at address 0, but we do want
4640     // to make sure the alignment fits nicely into a 32-bit integer.
4641     if (Alignment >= 1u << 31)
4642       return Error(AlignLoc, "alignment is too large");
4643   }
4644 
4645   if (parseToken(AsmToken::EndOfStatement,
4646                  "unexpected token in '.amdgpu_lds' directive"))
4647     return true;
4648 
4649   Symbol->redefineIfPossible();
4650   if (!Symbol->isUndefined())
4651     return Error(NameLoc, "invalid symbol redefinition");
4652 
4653   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4654   return false;
4655 }
4656 
4657 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4658   StringRef IDVal = DirectiveID.getString();
4659 
4660   if (isHsaAbiVersion3(&getSTI())) {
4661     if (IDVal == ".amdgcn_target")
4662       return ParseDirectiveAMDGCNTarget();
4663 
4664     if (IDVal == ".amdhsa_kernel")
4665       return ParseDirectiveAMDHSAKernel();
4666 
4667     // TODO: Restructure/combine with PAL metadata directive.
4668     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4669       return ParseDirectiveHSAMetadata();
4670   } else {
4671     if (IDVal == ".hsa_code_object_version")
4672       return ParseDirectiveHSACodeObjectVersion();
4673 
4674     if (IDVal == ".hsa_code_object_isa")
4675       return ParseDirectiveHSACodeObjectISA();
4676 
4677     if (IDVal == ".amd_kernel_code_t")
4678       return ParseDirectiveAMDKernelCodeT();
4679 
4680     if (IDVal == ".amdgpu_hsa_kernel")
4681       return ParseDirectiveAMDGPUHsaKernel();
4682 
4683     if (IDVal == ".amd_amdgpu_isa")
4684       return ParseDirectiveISAVersion();
4685 
4686     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4687       return ParseDirectiveHSAMetadata();
4688   }
4689 
4690   if (IDVal == ".amdgpu_lds")
4691     return ParseDirectiveAMDGPULDS();
4692 
4693   if (IDVal == PALMD::AssemblerDirectiveBegin)
4694     return ParseDirectivePALMetadataBegin();
4695 
4696   if (IDVal == PALMD::AssemblerDirective)
4697     return ParseDirectivePALMetadata();
4698 
4699   return true;
4700 }
4701 
4702 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4703                                            unsigned RegNo) const {
4704 
4705   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4706        R.isValid(); ++R) {
4707     if (*R == RegNo)
4708       return isGFX9Plus();
4709   }
4710 
4711   // GFX10 has 2 more SGPRs 104 and 105.
4712   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4713        R.isValid(); ++R) {
4714     if (*R == RegNo)
4715       return hasSGPR104_SGPR105();
4716   }
4717 
4718   switch (RegNo) {
4719   case AMDGPU::SRC_SHARED_BASE:
4720   case AMDGPU::SRC_SHARED_LIMIT:
4721   case AMDGPU::SRC_PRIVATE_BASE:
4722   case AMDGPU::SRC_PRIVATE_LIMIT:
4723   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4724     return !isCI() && !isSI() && !isVI();
4725   case AMDGPU::TBA:
4726   case AMDGPU::TBA_LO:
4727   case AMDGPU::TBA_HI:
4728   case AMDGPU::TMA:
4729   case AMDGPU::TMA_LO:
4730   case AMDGPU::TMA_HI:
4731     return !isGFX9() && !isGFX10();
4732   case AMDGPU::XNACK_MASK:
4733   case AMDGPU::XNACK_MASK_LO:
4734   case AMDGPU::XNACK_MASK_HI:
4735     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4736   case AMDGPU::SGPR_NULL:
4737     return isGFX10();
4738   default:
4739     break;
4740   }
4741 
4742   if (isCI())
4743     return true;
4744 
4745   if (isSI() || isGFX10()) {
4746     // No flat_scr on SI.
4747     // On GFX10 flat scratch is not a valid register operand and can only be
4748     // accessed with s_setreg/s_getreg.
4749     switch (RegNo) {
4750     case AMDGPU::FLAT_SCR:
4751     case AMDGPU::FLAT_SCR_LO:
4752     case AMDGPU::FLAT_SCR_HI:
4753       return false;
4754     default:
4755       return true;
4756     }
4757   }
4758 
4759   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4760   // SI/CI have.
4761   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4762        R.isValid(); ++R) {
4763     if (*R == RegNo)
4764       return hasSGPR102_SGPR103();
4765   }
4766 
4767   return true;
4768 }
4769 
4770 OperandMatchResultTy
4771 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4772                               OperandMode Mode) {
4773   // Try to parse with a custom parser
4774   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4775 
4776   // If we successfully parsed the operand or if there as an error parsing,
4777   // we are done.
4778   //
4779   // If we are parsing after we reach EndOfStatement then this means we
4780   // are appending default values to the Operands list.  This is only done
4781   // by custom parser, so we shouldn't continue on to the generic parsing.
4782   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4783       getLexer().is(AsmToken::EndOfStatement))
4784     return ResTy;
4785 
4786   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4787     unsigned Prefix = Operands.size();
4788     SMLoc LBraceLoc = getTok().getLoc();
4789     Parser.Lex(); // eat the '['
4790 
4791     for (;;) {
4792       ResTy = parseReg(Operands);
4793       if (ResTy != MatchOperand_Success)
4794         return ResTy;
4795 
4796       if (getLexer().is(AsmToken::RBrac))
4797         break;
4798 
4799       if (getLexer().isNot(AsmToken::Comma))
4800         return MatchOperand_ParseFail;
4801       Parser.Lex();
4802     }
4803 
4804     if (Operands.size() - Prefix > 1) {
4805       Operands.insert(Operands.begin() + Prefix,
4806                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4807       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4808                                                     getTok().getLoc()));
4809     }
4810 
4811     Parser.Lex(); // eat the ']'
4812     return MatchOperand_Success;
4813   }
4814 
4815   return parseRegOrImm(Operands);
4816 }
4817 
4818 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4819   // Clear any forced encodings from the previous instruction.
4820   setForcedEncodingSize(0);
4821   setForcedDPP(false);
4822   setForcedSDWA(false);
4823 
4824   if (Name.endswith("_e64")) {
4825     setForcedEncodingSize(64);
4826     return Name.substr(0, Name.size() - 4);
4827   } else if (Name.endswith("_e32")) {
4828     setForcedEncodingSize(32);
4829     return Name.substr(0, Name.size() - 4);
4830   } else if (Name.endswith("_dpp")) {
4831     setForcedDPP(true);
4832     return Name.substr(0, Name.size() - 4);
4833   } else if (Name.endswith("_sdwa")) {
4834     setForcedSDWA(true);
4835     return Name.substr(0, Name.size() - 5);
4836   }
4837   return Name;
4838 }
4839 
4840 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4841                                        StringRef Name,
4842                                        SMLoc NameLoc, OperandVector &Operands) {
4843   // Add the instruction mnemonic
4844   Name = parseMnemonicSuffix(Name);
4845   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4846 
4847   bool IsMIMG = Name.startswith("image_");
4848 
4849   while (!getLexer().is(AsmToken::EndOfStatement)) {
4850     OperandMode Mode = OperandMode_Default;
4851     if (IsMIMG && isGFX10() && Operands.size() == 2)
4852       Mode = OperandMode_NSA;
4853     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4854 
4855     // Eat the comma or space if there is one.
4856     if (getLexer().is(AsmToken::Comma))
4857       Parser.Lex();
4858 
4859     if (Res != MatchOperand_Success) {
4860       checkUnsupportedInstruction(Name, NameLoc);
4861       if (!Parser.hasPendingError()) {
4862         // FIXME: use real operand location rather than the current location.
4863         StringRef Msg =
4864           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4865                                             "not a valid operand.";
4866         Error(getLexer().getLoc(), Msg);
4867       }
4868       while (!getLexer().is(AsmToken::EndOfStatement)) {
4869         Parser.Lex();
4870       }
4871       return true;
4872     }
4873   }
4874 
4875   return false;
4876 }
4877 
4878 //===----------------------------------------------------------------------===//
4879 // Utility functions
4880 //===----------------------------------------------------------------------===//
4881 
4882 OperandMatchResultTy
4883 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4884 
4885   if (!trySkipId(Prefix, AsmToken::Colon))
4886     return MatchOperand_NoMatch;
4887 
4888   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4889 }
4890 
4891 OperandMatchResultTy
4892 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4893                                     AMDGPUOperand::ImmTy ImmTy,
4894                                     bool (*ConvertResult)(int64_t&)) {
4895   SMLoc S = getLoc();
4896   int64_t Value = 0;
4897 
4898   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4899   if (Res != MatchOperand_Success)
4900     return Res;
4901 
4902   if (ConvertResult && !ConvertResult(Value)) {
4903     Error(S, "invalid " + StringRef(Prefix) + " value.");
4904   }
4905 
4906   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4907   return MatchOperand_Success;
4908 }
4909 
4910 OperandMatchResultTy
4911 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4912                                              OperandVector &Operands,
4913                                              AMDGPUOperand::ImmTy ImmTy,
4914                                              bool (*ConvertResult)(int64_t&)) {
4915   SMLoc S = getLoc();
4916   if (!trySkipId(Prefix, AsmToken::Colon))
4917     return MatchOperand_NoMatch;
4918 
4919   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4920     return MatchOperand_ParseFail;
4921 
4922   unsigned Val = 0;
4923   const unsigned MaxSize = 4;
4924 
4925   // FIXME: How to verify the number of elements matches the number of src
4926   // operands?
4927   for (int I = 0; ; ++I) {
4928     int64_t Op;
4929     SMLoc Loc = getLoc();
4930     if (!parseExpr(Op))
4931       return MatchOperand_ParseFail;
4932 
4933     if (Op != 0 && Op != 1) {
4934       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4935       return MatchOperand_ParseFail;
4936     }
4937 
4938     Val |= (Op << I);
4939 
4940     if (trySkipToken(AsmToken::RBrac))
4941       break;
4942 
4943     if (I + 1 == MaxSize) {
4944       Error(getLoc(), "expected a closing square bracket");
4945       return MatchOperand_ParseFail;
4946     }
4947 
4948     if (!skipToken(AsmToken::Comma, "expected a comma"))
4949       return MatchOperand_ParseFail;
4950   }
4951 
4952   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4953   return MatchOperand_Success;
4954 }
4955 
4956 OperandMatchResultTy
4957 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4958                                AMDGPUOperand::ImmTy ImmTy) {
4959   int64_t Bit = 0;
4960   SMLoc S = Parser.getTok().getLoc();
4961 
4962   // We are at the end of the statement, and this is a default argument, so
4963   // use a default value.
4964   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4965     switch(getLexer().getKind()) {
4966       case AsmToken::Identifier: {
4967         StringRef Tok = Parser.getTok().getString();
4968         if (Tok == Name) {
4969           if (Tok == "r128" && !hasMIMG_R128())
4970             Error(S, "r128 modifier is not supported on this GPU");
4971           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4972             Error(S, "a16 modifier is not supported on this GPU");
4973           Bit = 1;
4974           Parser.Lex();
4975         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4976           Bit = 0;
4977           Parser.Lex();
4978         } else {
4979           return MatchOperand_NoMatch;
4980         }
4981         break;
4982       }
4983       default:
4984         return MatchOperand_NoMatch;
4985     }
4986   }
4987 
4988   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4989     return MatchOperand_ParseFail;
4990 
4991   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4992     ImmTy = AMDGPUOperand::ImmTyR128A16;
4993 
4994   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4995   return MatchOperand_Success;
4996 }
4997 
4998 static void addOptionalImmOperand(
4999   MCInst& Inst, const OperandVector& Operands,
5000   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5001   AMDGPUOperand::ImmTy ImmT,
5002   int64_t Default = 0) {
5003   auto i = OptionalIdx.find(ImmT);
5004   if (i != OptionalIdx.end()) {
5005     unsigned Idx = i->second;
5006     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5007   } else {
5008     Inst.addOperand(MCOperand::createImm(Default));
5009   }
5010 }
5011 
5012 OperandMatchResultTy
5013 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5014   if (getLexer().isNot(AsmToken::Identifier)) {
5015     return MatchOperand_NoMatch;
5016   }
5017   StringRef Tok = Parser.getTok().getString();
5018   if (Tok != Prefix) {
5019     return MatchOperand_NoMatch;
5020   }
5021 
5022   Parser.Lex();
5023   if (getLexer().isNot(AsmToken::Colon)) {
5024     return MatchOperand_ParseFail;
5025   }
5026 
5027   Parser.Lex();
5028   if (getLexer().isNot(AsmToken::Identifier)) {
5029     return MatchOperand_ParseFail;
5030   }
5031 
5032   Value = Parser.getTok().getString();
5033   return MatchOperand_Success;
5034 }
5035 
5036 //===----------------------------------------------------------------------===//
5037 // MTBUF format
5038 //===----------------------------------------------------------------------===//
5039 
5040 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5041                                   int64_t MaxVal,
5042                                   int64_t &Fmt) {
5043   int64_t Val;
5044   SMLoc Loc = getLoc();
5045 
5046   auto Res = parseIntWithPrefix(Pref, Val);
5047   if (Res == MatchOperand_ParseFail)
5048     return false;
5049   if (Res == MatchOperand_NoMatch)
5050     return true;
5051 
5052   if (Val < 0 || Val > MaxVal) {
5053     Error(Loc, Twine("out of range ", StringRef(Pref)));
5054     return false;
5055   }
5056 
5057   Fmt = Val;
5058   return true;
5059 }
5060 
5061 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5062 // values to live in a joint format operand in the MCInst encoding.
5063 OperandMatchResultTy
5064 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5065   using namespace llvm::AMDGPU::MTBUFFormat;
5066 
5067   int64_t Dfmt = DFMT_UNDEF;
5068   int64_t Nfmt = NFMT_UNDEF;
5069 
5070   // dfmt and nfmt can appear in either order, and each is optional.
5071   for (int I = 0; I < 2; ++I) {
5072     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5073       return MatchOperand_ParseFail;
5074 
5075     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5076       return MatchOperand_ParseFail;
5077     }
5078     // Skip optional comma between dfmt/nfmt
5079     // but guard against 2 commas following each other.
5080     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5081         !peekToken().is(AsmToken::Comma)) {
5082       trySkipToken(AsmToken::Comma);
5083     }
5084   }
5085 
5086   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5087     return MatchOperand_NoMatch;
5088 
5089   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5090   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5091 
5092   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5093   return MatchOperand_Success;
5094 }
5095 
5096 OperandMatchResultTy
5097 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5098   using namespace llvm::AMDGPU::MTBUFFormat;
5099 
5100   int64_t Fmt = UFMT_UNDEF;
5101 
5102   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5103     return MatchOperand_ParseFail;
5104 
5105   if (Fmt == UFMT_UNDEF)
5106     return MatchOperand_NoMatch;
5107 
5108   Format = Fmt;
5109   return MatchOperand_Success;
5110 }
5111 
5112 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5113                                     int64_t &Nfmt,
5114                                     StringRef FormatStr,
5115                                     SMLoc Loc) {
5116   using namespace llvm::AMDGPU::MTBUFFormat;
5117   int64_t Format;
5118 
5119   Format = getDfmt(FormatStr);
5120   if (Format != DFMT_UNDEF) {
5121     Dfmt = Format;
5122     return true;
5123   }
5124 
5125   Format = getNfmt(FormatStr, getSTI());
5126   if (Format != NFMT_UNDEF) {
5127     Nfmt = Format;
5128     return true;
5129   }
5130 
5131   Error(Loc, "unsupported format");
5132   return false;
5133 }
5134 
5135 OperandMatchResultTy
5136 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5137                                           SMLoc FormatLoc,
5138                                           int64_t &Format) {
5139   using namespace llvm::AMDGPU::MTBUFFormat;
5140 
5141   int64_t Dfmt = DFMT_UNDEF;
5142   int64_t Nfmt = NFMT_UNDEF;
5143   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5144     return MatchOperand_ParseFail;
5145 
5146   if (trySkipToken(AsmToken::Comma)) {
5147     StringRef Str;
5148     SMLoc Loc = getLoc();
5149     if (!parseId(Str, "expected a format string") ||
5150         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5151       return MatchOperand_ParseFail;
5152     }
5153     if (Dfmt == DFMT_UNDEF) {
5154       Error(Loc, "duplicate numeric format");
5155       return MatchOperand_ParseFail;
5156     } else if (Nfmt == NFMT_UNDEF) {
5157       Error(Loc, "duplicate data format");
5158       return MatchOperand_ParseFail;
5159     }
5160   }
5161 
5162   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5163   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5164 
5165   if (isGFX10()) {
5166     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5167     if (Ufmt == UFMT_UNDEF) {
5168       Error(FormatLoc, "unsupported format");
5169       return MatchOperand_ParseFail;
5170     }
5171     Format = Ufmt;
5172   } else {
5173     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5174   }
5175 
5176   return MatchOperand_Success;
5177 }
5178 
5179 OperandMatchResultTy
5180 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5181                                             SMLoc Loc,
5182                                             int64_t &Format) {
5183   using namespace llvm::AMDGPU::MTBUFFormat;
5184 
5185   auto Id = getUnifiedFormat(FormatStr);
5186   if (Id == UFMT_UNDEF)
5187     return MatchOperand_NoMatch;
5188 
5189   if (!isGFX10()) {
5190     Error(Loc, "unified format is not supported on this GPU");
5191     return MatchOperand_ParseFail;
5192   }
5193 
5194   Format = Id;
5195   return MatchOperand_Success;
5196 }
5197 
5198 OperandMatchResultTy
5199 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5200   using namespace llvm::AMDGPU::MTBUFFormat;
5201   SMLoc Loc = getLoc();
5202 
5203   if (!parseExpr(Format))
5204     return MatchOperand_ParseFail;
5205   if (!isValidFormatEncoding(Format, getSTI())) {
5206     Error(Loc, "out of range format");
5207     return MatchOperand_ParseFail;
5208   }
5209 
5210   return MatchOperand_Success;
5211 }
5212 
5213 OperandMatchResultTy
5214 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5215   using namespace llvm::AMDGPU::MTBUFFormat;
5216 
5217   if (!trySkipId("format", AsmToken::Colon))
5218     return MatchOperand_NoMatch;
5219 
5220   if (trySkipToken(AsmToken::LBrac)) {
5221     StringRef FormatStr;
5222     SMLoc Loc = getLoc();
5223     if (!parseId(FormatStr, "expected a format string"))
5224       return MatchOperand_ParseFail;
5225 
5226     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5227     if (Res == MatchOperand_NoMatch)
5228       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5229     if (Res != MatchOperand_Success)
5230       return Res;
5231 
5232     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5233       return MatchOperand_ParseFail;
5234 
5235     return MatchOperand_Success;
5236   }
5237 
5238   return parseNumericFormat(Format);
5239 }
5240 
5241 OperandMatchResultTy
5242 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5243   using namespace llvm::AMDGPU::MTBUFFormat;
5244 
5245   int64_t Format = getDefaultFormatEncoding(getSTI());
5246   OperandMatchResultTy Res;
5247   SMLoc Loc = getLoc();
5248 
5249   // Parse legacy format syntax.
5250   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5251   if (Res == MatchOperand_ParseFail)
5252     return Res;
5253 
5254   bool FormatFound = (Res == MatchOperand_Success);
5255 
5256   Operands.push_back(
5257     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5258 
5259   if (FormatFound)
5260     trySkipToken(AsmToken::Comma);
5261 
5262   if (isToken(AsmToken::EndOfStatement)) {
5263     // We are expecting an soffset operand,
5264     // but let matcher handle the error.
5265     return MatchOperand_Success;
5266   }
5267 
5268   // Parse soffset.
5269   Res = parseRegOrImm(Operands);
5270   if (Res != MatchOperand_Success)
5271     return Res;
5272 
5273   trySkipToken(AsmToken::Comma);
5274 
5275   if (!FormatFound) {
5276     Res = parseSymbolicOrNumericFormat(Format);
5277     if (Res == MatchOperand_ParseFail)
5278       return Res;
5279     if (Res == MatchOperand_Success) {
5280       auto Size = Operands.size();
5281       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5282       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5283       Op.setImm(Format);
5284     }
5285     return MatchOperand_Success;
5286   }
5287 
5288   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5289     Error(getLoc(), "duplicate format");
5290     return MatchOperand_ParseFail;
5291   }
5292   return MatchOperand_Success;
5293 }
5294 
5295 //===----------------------------------------------------------------------===//
5296 // ds
5297 //===----------------------------------------------------------------------===//
5298 
5299 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5300                                     const OperandVector &Operands) {
5301   OptionalImmIndexMap OptionalIdx;
5302 
5303   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5304     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5305 
5306     // Add the register arguments
5307     if (Op.isReg()) {
5308       Op.addRegOperands(Inst, 1);
5309       continue;
5310     }
5311 
5312     // Handle optional arguments
5313     OptionalIdx[Op.getImmTy()] = i;
5314   }
5315 
5316   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5317   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5318   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5319 
5320   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5321 }
5322 
5323 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5324                                 bool IsGdsHardcoded) {
5325   OptionalImmIndexMap OptionalIdx;
5326 
5327   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5328     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5329 
5330     // Add the register arguments
5331     if (Op.isReg()) {
5332       Op.addRegOperands(Inst, 1);
5333       continue;
5334     }
5335 
5336     if (Op.isToken() && Op.getToken() == "gds") {
5337       IsGdsHardcoded = true;
5338       continue;
5339     }
5340 
5341     // Handle optional arguments
5342     OptionalIdx[Op.getImmTy()] = i;
5343   }
5344 
5345   AMDGPUOperand::ImmTy OffsetType =
5346     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5347      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5348      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5349                                                       AMDGPUOperand::ImmTyOffset;
5350 
5351   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5352 
5353   if (!IsGdsHardcoded) {
5354     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5355   }
5356   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5357 }
5358 
5359 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5360   OptionalImmIndexMap OptionalIdx;
5361 
5362   unsigned OperandIdx[4];
5363   unsigned EnMask = 0;
5364   int SrcIdx = 0;
5365 
5366   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5367     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5368 
5369     // Add the register arguments
5370     if (Op.isReg()) {
5371       assert(SrcIdx < 4);
5372       OperandIdx[SrcIdx] = Inst.size();
5373       Op.addRegOperands(Inst, 1);
5374       ++SrcIdx;
5375       continue;
5376     }
5377 
5378     if (Op.isOff()) {
5379       assert(SrcIdx < 4);
5380       OperandIdx[SrcIdx] = Inst.size();
5381       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5382       ++SrcIdx;
5383       continue;
5384     }
5385 
5386     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5387       Op.addImmOperands(Inst, 1);
5388       continue;
5389     }
5390 
5391     if (Op.isToken() && Op.getToken() == "done")
5392       continue;
5393 
5394     // Handle optional arguments
5395     OptionalIdx[Op.getImmTy()] = i;
5396   }
5397 
5398   assert(SrcIdx == 4);
5399 
5400   bool Compr = false;
5401   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5402     Compr = true;
5403     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5404     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5405     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5406   }
5407 
5408   for (auto i = 0; i < SrcIdx; ++i) {
5409     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5410       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5411     }
5412   }
5413 
5414   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5415   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5416 
5417   Inst.addOperand(MCOperand::createImm(EnMask));
5418 }
5419 
5420 //===----------------------------------------------------------------------===//
5421 // s_waitcnt
5422 //===----------------------------------------------------------------------===//
5423 
5424 static bool
5425 encodeCnt(
5426   const AMDGPU::IsaVersion ISA,
5427   int64_t &IntVal,
5428   int64_t CntVal,
5429   bool Saturate,
5430   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5431   unsigned (*decode)(const IsaVersion &Version, unsigned))
5432 {
5433   bool Failed = false;
5434 
5435   IntVal = encode(ISA, IntVal, CntVal);
5436   if (CntVal != decode(ISA, IntVal)) {
5437     if (Saturate) {
5438       IntVal = encode(ISA, IntVal, -1);
5439     } else {
5440       Failed = true;
5441     }
5442   }
5443   return Failed;
5444 }
5445 
5446 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5447 
5448   SMLoc CntLoc = getLoc();
5449   StringRef CntName = getTokenStr();
5450 
5451   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5452       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5453     return false;
5454 
5455   int64_t CntVal;
5456   SMLoc ValLoc = getLoc();
5457   if (!parseExpr(CntVal))
5458     return false;
5459 
5460   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5461 
5462   bool Failed = true;
5463   bool Sat = CntName.endswith("_sat");
5464 
5465   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5466     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5467   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5468     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5469   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5470     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5471   } else {
5472     Error(CntLoc, "invalid counter name " + CntName);
5473     return false;
5474   }
5475 
5476   if (Failed) {
5477     Error(ValLoc, "too large value for " + CntName);
5478     return false;
5479   }
5480 
5481   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5482     return false;
5483 
5484   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5485     if (isToken(AsmToken::EndOfStatement)) {
5486       Error(getLoc(), "expected a counter name");
5487       return false;
5488     }
5489   }
5490 
5491   return true;
5492 }
5493 
5494 OperandMatchResultTy
5495 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5496   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5497   int64_t Waitcnt = getWaitcntBitMask(ISA);
5498   SMLoc S = getLoc();
5499 
5500   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5501     while (!isToken(AsmToken::EndOfStatement)) {
5502       if (!parseCnt(Waitcnt))
5503         return MatchOperand_ParseFail;
5504     }
5505   } else {
5506     if (!parseExpr(Waitcnt))
5507       return MatchOperand_ParseFail;
5508   }
5509 
5510   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5511   return MatchOperand_Success;
5512 }
5513 
5514 bool
5515 AMDGPUOperand::isSWaitCnt() const {
5516   return isImm();
5517 }
5518 
5519 //===----------------------------------------------------------------------===//
5520 // hwreg
5521 //===----------------------------------------------------------------------===//
5522 
5523 bool
5524 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5525                                 int64_t &Offset,
5526                                 int64_t &Width) {
5527   using namespace llvm::AMDGPU::Hwreg;
5528 
5529   // The register may be specified by name or using a numeric code
5530   if (isToken(AsmToken::Identifier) &&
5531       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5532     HwReg.IsSymbolic = true;
5533     lex(); // skip message name
5534   } else if (!parseExpr(HwReg.Id)) {
5535     return false;
5536   }
5537 
5538   if (trySkipToken(AsmToken::RParen))
5539     return true;
5540 
5541   // parse optional params
5542   return
5543     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5544     parseExpr(Offset) &&
5545     skipToken(AsmToken::Comma, "expected a comma") &&
5546     parseExpr(Width) &&
5547     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5548 }
5549 
5550 bool
5551 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5552                                const int64_t Offset,
5553                                const int64_t Width,
5554                                const SMLoc Loc) {
5555 
5556   using namespace llvm::AMDGPU::Hwreg;
5557 
5558   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5559     Error(Loc, "specified hardware register is not supported on this GPU");
5560     return false;
5561   } else if (!isValidHwreg(HwReg.Id)) {
5562     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5563     return false;
5564   } else if (!isValidHwregOffset(Offset)) {
5565     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5566     return false;
5567   } else if (!isValidHwregWidth(Width)) {
5568     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5569     return false;
5570   }
5571   return true;
5572 }
5573 
5574 OperandMatchResultTy
5575 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5576   using namespace llvm::AMDGPU::Hwreg;
5577 
5578   int64_t ImmVal = 0;
5579   SMLoc Loc = getLoc();
5580 
5581   if (trySkipId("hwreg", AsmToken::LParen)) {
5582     OperandInfoTy HwReg(ID_UNKNOWN_);
5583     int64_t Offset = OFFSET_DEFAULT_;
5584     int64_t Width = WIDTH_DEFAULT_;
5585     if (parseHwregBody(HwReg, Offset, Width) &&
5586         validateHwreg(HwReg, Offset, Width, Loc)) {
5587       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5588     } else {
5589       return MatchOperand_ParseFail;
5590     }
5591   } else if (parseExpr(ImmVal)) {
5592     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5593       Error(Loc, "invalid immediate: only 16-bit values are legal");
5594       return MatchOperand_ParseFail;
5595     }
5596   } else {
5597     return MatchOperand_ParseFail;
5598   }
5599 
5600   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5601   return MatchOperand_Success;
5602 }
5603 
5604 bool AMDGPUOperand::isHwreg() const {
5605   return isImmTy(ImmTyHwreg);
5606 }
5607 
5608 //===----------------------------------------------------------------------===//
5609 // sendmsg
5610 //===----------------------------------------------------------------------===//
5611 
5612 bool
5613 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5614                                   OperandInfoTy &Op,
5615                                   OperandInfoTy &Stream) {
5616   using namespace llvm::AMDGPU::SendMsg;
5617 
5618   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5619     Msg.IsSymbolic = true;
5620     lex(); // skip message name
5621   } else if (!parseExpr(Msg.Id)) {
5622     return false;
5623   }
5624 
5625   if (trySkipToken(AsmToken::Comma)) {
5626     Op.IsDefined = true;
5627     if (isToken(AsmToken::Identifier) &&
5628         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5629       lex(); // skip operation name
5630     } else if (!parseExpr(Op.Id)) {
5631       return false;
5632     }
5633 
5634     if (trySkipToken(AsmToken::Comma)) {
5635       Stream.IsDefined = true;
5636       if (!parseExpr(Stream.Id))
5637         return false;
5638     }
5639   }
5640 
5641   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5642 }
5643 
5644 bool
5645 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5646                                  const OperandInfoTy &Op,
5647                                  const OperandInfoTy &Stream,
5648                                  const SMLoc S) {
5649   using namespace llvm::AMDGPU::SendMsg;
5650 
5651   // Validation strictness depends on whether message is specified
5652   // in a symbolc or in a numeric form. In the latter case
5653   // only encoding possibility is checked.
5654   bool Strict = Msg.IsSymbolic;
5655 
5656   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5657     Error(S, "invalid message id");
5658     return false;
5659   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5660     Error(S, Op.IsDefined ?
5661              "message does not support operations" :
5662              "missing message operation");
5663     return false;
5664   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5665     Error(S, "invalid operation id");
5666     return false;
5667   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5668     Error(S, "message operation does not support streams");
5669     return false;
5670   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5671     Error(S, "invalid message stream id");
5672     return false;
5673   }
5674   return true;
5675 }
5676 
5677 OperandMatchResultTy
5678 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5679   using namespace llvm::AMDGPU::SendMsg;
5680 
5681   int64_t ImmVal = 0;
5682   SMLoc Loc = getLoc();
5683 
5684   if (trySkipId("sendmsg", AsmToken::LParen)) {
5685     OperandInfoTy Msg(ID_UNKNOWN_);
5686     OperandInfoTy Op(OP_NONE_);
5687     OperandInfoTy Stream(STREAM_ID_NONE_);
5688     if (parseSendMsgBody(Msg, Op, Stream) &&
5689         validateSendMsg(Msg, Op, Stream, Loc)) {
5690       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5691     } else {
5692       return MatchOperand_ParseFail;
5693     }
5694   } else if (parseExpr(ImmVal)) {
5695     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5696       Error(Loc, "invalid immediate: only 16-bit values are legal");
5697       return MatchOperand_ParseFail;
5698     }
5699   } else {
5700     return MatchOperand_ParseFail;
5701   }
5702 
5703   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5704   return MatchOperand_Success;
5705 }
5706 
5707 bool AMDGPUOperand::isSendMsg() const {
5708   return isImmTy(ImmTySendMsg);
5709 }
5710 
5711 //===----------------------------------------------------------------------===//
5712 // v_interp
5713 //===----------------------------------------------------------------------===//
5714 
5715 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5716   if (getLexer().getKind() != AsmToken::Identifier)
5717     return MatchOperand_NoMatch;
5718 
5719   StringRef Str = Parser.getTok().getString();
5720   int Slot = StringSwitch<int>(Str)
5721     .Case("p10", 0)
5722     .Case("p20", 1)
5723     .Case("p0", 2)
5724     .Default(-1);
5725 
5726   SMLoc S = Parser.getTok().getLoc();
5727   if (Slot == -1)
5728     return MatchOperand_ParseFail;
5729 
5730   Parser.Lex();
5731   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5732                                               AMDGPUOperand::ImmTyInterpSlot));
5733   return MatchOperand_Success;
5734 }
5735 
5736 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5737   if (getLexer().getKind() != AsmToken::Identifier)
5738     return MatchOperand_NoMatch;
5739 
5740   StringRef Str = Parser.getTok().getString();
5741   if (!Str.startswith("attr"))
5742     return MatchOperand_NoMatch;
5743 
5744   StringRef Chan = Str.take_back(2);
5745   int AttrChan = StringSwitch<int>(Chan)
5746     .Case(".x", 0)
5747     .Case(".y", 1)
5748     .Case(".z", 2)
5749     .Case(".w", 3)
5750     .Default(-1);
5751   if (AttrChan == -1)
5752     return MatchOperand_ParseFail;
5753 
5754   Str = Str.drop_back(2).drop_front(4);
5755 
5756   uint8_t Attr;
5757   if (Str.getAsInteger(10, Attr))
5758     return MatchOperand_ParseFail;
5759 
5760   SMLoc S = Parser.getTok().getLoc();
5761   Parser.Lex();
5762   if (Attr > 63) {
5763     Error(S, "out of bounds attr");
5764     return MatchOperand_ParseFail;
5765   }
5766 
5767   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5768 
5769   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5770                                               AMDGPUOperand::ImmTyInterpAttr));
5771   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5772                                               AMDGPUOperand::ImmTyAttrChan));
5773   return MatchOperand_Success;
5774 }
5775 
5776 //===----------------------------------------------------------------------===//
5777 // exp
5778 //===----------------------------------------------------------------------===//
5779 
5780 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5781                                                       uint8_t &Val) {
5782   if (Str == "null") {
5783     Val = 9;
5784     return MatchOperand_Success;
5785   }
5786 
5787   if (Str.startswith("mrt")) {
5788     Str = Str.drop_front(3);
5789     if (Str == "z") { // == mrtz
5790       Val = 8;
5791       return MatchOperand_Success;
5792     }
5793 
5794     if (Str.getAsInteger(10, Val))
5795       return MatchOperand_ParseFail;
5796 
5797     if (Val > 7) {
5798       Error(getLoc(), "invalid exp target");
5799       return MatchOperand_ParseFail;
5800     }
5801 
5802     return MatchOperand_Success;
5803   }
5804 
5805   if (Str.startswith("pos")) {
5806     Str = Str.drop_front(3);
5807     if (Str.getAsInteger(10, Val))
5808       return MatchOperand_ParseFail;
5809 
5810     if (Val > 4 || (Val == 4 && !isGFX10())) {
5811       Error(getLoc(), "invalid exp target");
5812       return MatchOperand_ParseFail;
5813     }
5814 
5815     Val += 12;
5816     return MatchOperand_Success;
5817   }
5818 
5819   if (isGFX10() && Str == "prim") {
5820     Val = 20;
5821     return MatchOperand_Success;
5822   }
5823 
5824   if (Str.startswith("param")) {
5825     Str = Str.drop_front(5);
5826     if (Str.getAsInteger(10, Val))
5827       return MatchOperand_ParseFail;
5828 
5829     if (Val >= 32) {
5830       Error(getLoc(), "invalid exp target");
5831       return MatchOperand_ParseFail;
5832     }
5833 
5834     Val += 32;
5835     return MatchOperand_Success;
5836   }
5837 
5838   if (Str.startswith("invalid_target_")) {
5839     Str = Str.drop_front(15);
5840     if (Str.getAsInteger(10, Val))
5841       return MatchOperand_ParseFail;
5842 
5843     Error(getLoc(), "invalid exp target");
5844     return MatchOperand_ParseFail;
5845   }
5846 
5847   return MatchOperand_NoMatch;
5848 }
5849 
5850 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5851   uint8_t Val;
5852   StringRef Str = Parser.getTok().getString();
5853 
5854   auto Res = parseExpTgtImpl(Str, Val);
5855   if (Res != MatchOperand_Success)
5856     return Res;
5857 
5858   SMLoc S = Parser.getTok().getLoc();
5859   Parser.Lex();
5860 
5861   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5862                                               AMDGPUOperand::ImmTyExpTgt));
5863   return MatchOperand_Success;
5864 }
5865 
5866 //===----------------------------------------------------------------------===//
5867 // parser helpers
5868 //===----------------------------------------------------------------------===//
5869 
5870 bool
5871 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5872   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5873 }
5874 
5875 bool
5876 AMDGPUAsmParser::isId(const StringRef Id) const {
5877   return isId(getToken(), Id);
5878 }
5879 
5880 bool
5881 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5882   return getTokenKind() == Kind;
5883 }
5884 
5885 bool
5886 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5887   if (isId(Id)) {
5888     lex();
5889     return true;
5890   }
5891   return false;
5892 }
5893 
5894 bool
5895 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5896   if (isId(Id) && peekToken().is(Kind)) {
5897     lex();
5898     lex();
5899     return true;
5900   }
5901   return false;
5902 }
5903 
5904 bool
5905 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5906   if (isToken(Kind)) {
5907     lex();
5908     return true;
5909   }
5910   return false;
5911 }
5912 
5913 bool
5914 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5915                            const StringRef ErrMsg) {
5916   if (!trySkipToken(Kind)) {
5917     Error(getLoc(), ErrMsg);
5918     return false;
5919   }
5920   return true;
5921 }
5922 
5923 bool
5924 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5925   return !getParser().parseAbsoluteExpression(Imm);
5926 }
5927 
5928 bool
5929 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5930   SMLoc S = getLoc();
5931 
5932   const MCExpr *Expr;
5933   if (Parser.parseExpression(Expr))
5934     return false;
5935 
5936   int64_t IntVal;
5937   if (Expr->evaluateAsAbsolute(IntVal)) {
5938     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5939   } else {
5940     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5941   }
5942   return true;
5943 }
5944 
5945 bool
5946 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5947   if (isToken(AsmToken::String)) {
5948     Val = getToken().getStringContents();
5949     lex();
5950     return true;
5951   } else {
5952     Error(getLoc(), ErrMsg);
5953     return false;
5954   }
5955 }
5956 
5957 bool
5958 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5959   if (isToken(AsmToken::Identifier)) {
5960     Val = getTokenStr();
5961     lex();
5962     return true;
5963   } else {
5964     Error(getLoc(), ErrMsg);
5965     return false;
5966   }
5967 }
5968 
5969 AsmToken
5970 AMDGPUAsmParser::getToken() const {
5971   return Parser.getTok();
5972 }
5973 
5974 AsmToken
5975 AMDGPUAsmParser::peekToken() {
5976   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
5977 }
5978 
5979 void
5980 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5981   auto TokCount = getLexer().peekTokens(Tokens);
5982 
5983   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5984     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5985 }
5986 
5987 AsmToken::TokenKind
5988 AMDGPUAsmParser::getTokenKind() const {
5989   return getLexer().getKind();
5990 }
5991 
5992 SMLoc
5993 AMDGPUAsmParser::getLoc() const {
5994   return getToken().getLoc();
5995 }
5996 
5997 StringRef
5998 AMDGPUAsmParser::getTokenStr() const {
5999   return getToken().getString();
6000 }
6001 
6002 void
6003 AMDGPUAsmParser::lex() {
6004   Parser.Lex();
6005 }
6006 
6007 //===----------------------------------------------------------------------===//
6008 // swizzle
6009 //===----------------------------------------------------------------------===//
6010 
6011 LLVM_READNONE
6012 static unsigned
6013 encodeBitmaskPerm(const unsigned AndMask,
6014                   const unsigned OrMask,
6015                   const unsigned XorMask) {
6016   using namespace llvm::AMDGPU::Swizzle;
6017 
6018   return BITMASK_PERM_ENC |
6019          (AndMask << BITMASK_AND_SHIFT) |
6020          (OrMask  << BITMASK_OR_SHIFT)  |
6021          (XorMask << BITMASK_XOR_SHIFT);
6022 }
6023 
6024 bool
6025 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6026                                       const unsigned MinVal,
6027                                       const unsigned MaxVal,
6028                                       const StringRef ErrMsg) {
6029   for (unsigned i = 0; i < OpNum; ++i) {
6030     if (!skipToken(AsmToken::Comma, "expected a comma")){
6031       return false;
6032     }
6033     SMLoc ExprLoc = Parser.getTok().getLoc();
6034     if (!parseExpr(Op[i])) {
6035       return false;
6036     }
6037     if (Op[i] < MinVal || Op[i] > MaxVal) {
6038       Error(ExprLoc, ErrMsg);
6039       return false;
6040     }
6041   }
6042 
6043   return true;
6044 }
6045 
6046 bool
6047 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6048   using namespace llvm::AMDGPU::Swizzle;
6049 
6050   int64_t Lane[LANE_NUM];
6051   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6052                            "expected a 2-bit lane id")) {
6053     Imm = QUAD_PERM_ENC;
6054     for (unsigned I = 0; I < LANE_NUM; ++I) {
6055       Imm |= Lane[I] << (LANE_SHIFT * I);
6056     }
6057     return true;
6058   }
6059   return false;
6060 }
6061 
6062 bool
6063 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6064   using namespace llvm::AMDGPU::Swizzle;
6065 
6066   SMLoc S = Parser.getTok().getLoc();
6067   int64_t GroupSize;
6068   int64_t LaneIdx;
6069 
6070   if (!parseSwizzleOperands(1, &GroupSize,
6071                             2, 32,
6072                             "group size must be in the interval [2,32]")) {
6073     return false;
6074   }
6075   if (!isPowerOf2_64(GroupSize)) {
6076     Error(S, "group size must be a power of two");
6077     return false;
6078   }
6079   if (parseSwizzleOperands(1, &LaneIdx,
6080                            0, GroupSize - 1,
6081                            "lane id must be in the interval [0,group size - 1]")) {
6082     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6083     return true;
6084   }
6085   return false;
6086 }
6087 
6088 bool
6089 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6090   using namespace llvm::AMDGPU::Swizzle;
6091 
6092   SMLoc S = Parser.getTok().getLoc();
6093   int64_t GroupSize;
6094 
6095   if (!parseSwizzleOperands(1, &GroupSize,
6096       2, 32, "group size must be in the interval [2,32]")) {
6097     return false;
6098   }
6099   if (!isPowerOf2_64(GroupSize)) {
6100     Error(S, "group size must be a power of two");
6101     return false;
6102   }
6103 
6104   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6105   return true;
6106 }
6107 
6108 bool
6109 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6110   using namespace llvm::AMDGPU::Swizzle;
6111 
6112   SMLoc S = Parser.getTok().getLoc();
6113   int64_t GroupSize;
6114 
6115   if (!parseSwizzleOperands(1, &GroupSize,
6116       1, 16, "group size must be in the interval [1,16]")) {
6117     return false;
6118   }
6119   if (!isPowerOf2_64(GroupSize)) {
6120     Error(S, "group size must be a power of two");
6121     return false;
6122   }
6123 
6124   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6125   return true;
6126 }
6127 
6128 bool
6129 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6130   using namespace llvm::AMDGPU::Swizzle;
6131 
6132   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6133     return false;
6134   }
6135 
6136   StringRef Ctl;
6137   SMLoc StrLoc = Parser.getTok().getLoc();
6138   if (!parseString(Ctl)) {
6139     return false;
6140   }
6141   if (Ctl.size() != BITMASK_WIDTH) {
6142     Error(StrLoc, "expected a 5-character mask");
6143     return false;
6144   }
6145 
6146   unsigned AndMask = 0;
6147   unsigned OrMask = 0;
6148   unsigned XorMask = 0;
6149 
6150   for (size_t i = 0; i < Ctl.size(); ++i) {
6151     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6152     switch(Ctl[i]) {
6153     default:
6154       Error(StrLoc, "invalid mask");
6155       return false;
6156     case '0':
6157       break;
6158     case '1':
6159       OrMask |= Mask;
6160       break;
6161     case 'p':
6162       AndMask |= Mask;
6163       break;
6164     case 'i':
6165       AndMask |= Mask;
6166       XorMask |= Mask;
6167       break;
6168     }
6169   }
6170 
6171   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6172   return true;
6173 }
6174 
6175 bool
6176 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6177 
6178   SMLoc OffsetLoc = Parser.getTok().getLoc();
6179 
6180   if (!parseExpr(Imm)) {
6181     return false;
6182   }
6183   if (!isUInt<16>(Imm)) {
6184     Error(OffsetLoc, "expected a 16-bit offset");
6185     return false;
6186   }
6187   return true;
6188 }
6189 
6190 bool
6191 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6192   using namespace llvm::AMDGPU::Swizzle;
6193 
6194   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6195 
6196     SMLoc ModeLoc = Parser.getTok().getLoc();
6197     bool Ok = false;
6198 
6199     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6200       Ok = parseSwizzleQuadPerm(Imm);
6201     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6202       Ok = parseSwizzleBitmaskPerm(Imm);
6203     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6204       Ok = parseSwizzleBroadcast(Imm);
6205     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6206       Ok = parseSwizzleSwap(Imm);
6207     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6208       Ok = parseSwizzleReverse(Imm);
6209     } else {
6210       Error(ModeLoc, "expected a swizzle mode");
6211     }
6212 
6213     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6214   }
6215 
6216   return false;
6217 }
6218 
6219 OperandMatchResultTy
6220 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6221   SMLoc S = Parser.getTok().getLoc();
6222   int64_t Imm = 0;
6223 
6224   if (trySkipId("offset")) {
6225 
6226     bool Ok = false;
6227     if (skipToken(AsmToken::Colon, "expected a colon")) {
6228       if (trySkipId("swizzle")) {
6229         Ok = parseSwizzleMacro(Imm);
6230       } else {
6231         Ok = parseSwizzleOffset(Imm);
6232       }
6233     }
6234 
6235     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6236 
6237     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6238   } else {
6239     // Swizzle "offset" operand is optional.
6240     // If it is omitted, try parsing other optional operands.
6241     return parseOptionalOpr(Operands);
6242   }
6243 }
6244 
6245 bool
6246 AMDGPUOperand::isSwizzle() const {
6247   return isImmTy(ImmTySwizzle);
6248 }
6249 
6250 //===----------------------------------------------------------------------===//
6251 // VGPR Index Mode
6252 //===----------------------------------------------------------------------===//
6253 
6254 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6255 
6256   using namespace llvm::AMDGPU::VGPRIndexMode;
6257 
6258   if (trySkipToken(AsmToken::RParen)) {
6259     return OFF;
6260   }
6261 
6262   int64_t Imm = 0;
6263 
6264   while (true) {
6265     unsigned Mode = 0;
6266     SMLoc S = Parser.getTok().getLoc();
6267 
6268     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6269       if (trySkipId(IdSymbolic[ModeId])) {
6270         Mode = 1 << ModeId;
6271         break;
6272       }
6273     }
6274 
6275     if (Mode == 0) {
6276       Error(S, (Imm == 0)?
6277                "expected a VGPR index mode or a closing parenthesis" :
6278                "expected a VGPR index mode");
6279       return UNDEF;
6280     }
6281 
6282     if (Imm & Mode) {
6283       Error(S, "duplicate VGPR index mode");
6284       return UNDEF;
6285     }
6286     Imm |= Mode;
6287 
6288     if (trySkipToken(AsmToken::RParen))
6289       break;
6290     if (!skipToken(AsmToken::Comma,
6291                    "expected a comma or a closing parenthesis"))
6292       return UNDEF;
6293   }
6294 
6295   return Imm;
6296 }
6297 
6298 OperandMatchResultTy
6299 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6300 
6301   using namespace llvm::AMDGPU::VGPRIndexMode;
6302 
6303   int64_t Imm = 0;
6304   SMLoc S = Parser.getTok().getLoc();
6305 
6306   if (getLexer().getKind() == AsmToken::Identifier &&
6307       Parser.getTok().getString() == "gpr_idx" &&
6308       getLexer().peekTok().is(AsmToken::LParen)) {
6309 
6310     Parser.Lex();
6311     Parser.Lex();
6312 
6313     Imm = parseGPRIdxMacro();
6314     if (Imm == UNDEF)
6315       return MatchOperand_ParseFail;
6316 
6317   } else {
6318     if (getParser().parseAbsoluteExpression(Imm))
6319       return MatchOperand_ParseFail;
6320     if (Imm < 0 || !isUInt<4>(Imm)) {
6321       Error(S, "invalid immediate: only 4-bit values are legal");
6322       return MatchOperand_ParseFail;
6323     }
6324   }
6325 
6326   Operands.push_back(
6327       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6328   return MatchOperand_Success;
6329 }
6330 
6331 bool AMDGPUOperand::isGPRIdxMode() const {
6332   return isImmTy(ImmTyGprIdxMode);
6333 }
6334 
6335 //===----------------------------------------------------------------------===//
6336 // sopp branch targets
6337 //===----------------------------------------------------------------------===//
6338 
6339 OperandMatchResultTy
6340 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6341 
6342   // Make sure we are not parsing something
6343   // that looks like a label or an expression but is not.
6344   // This will improve error messages.
6345   if (isRegister() || isModifier())
6346     return MatchOperand_NoMatch;
6347 
6348   if (!parseExpr(Operands))
6349     return MatchOperand_ParseFail;
6350 
6351   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6352   assert(Opr.isImm() || Opr.isExpr());
6353   SMLoc Loc = Opr.getStartLoc();
6354 
6355   // Currently we do not support arbitrary expressions as branch targets.
6356   // Only labels and absolute expressions are accepted.
6357   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6358     Error(Loc, "expected an absolute expression or a label");
6359   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6360     Error(Loc, "expected a 16-bit signed jump offset");
6361   }
6362 
6363   return MatchOperand_Success;
6364 }
6365 
6366 //===----------------------------------------------------------------------===//
6367 // Boolean holding registers
6368 //===----------------------------------------------------------------------===//
6369 
6370 OperandMatchResultTy
6371 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6372   return parseReg(Operands);
6373 }
6374 
6375 //===----------------------------------------------------------------------===//
6376 // mubuf
6377 //===----------------------------------------------------------------------===//
6378 
6379 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6380   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6381 }
6382 
6383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6384   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6385 }
6386 
6387 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6388   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6389 }
6390 
6391 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6392                                const OperandVector &Operands,
6393                                bool IsAtomic,
6394                                bool IsAtomicReturn,
6395                                bool IsLds) {
6396   bool IsLdsOpcode = IsLds;
6397   bool HasLdsModifier = false;
6398   OptionalImmIndexMap OptionalIdx;
6399   assert(IsAtomicReturn ? IsAtomic : true);
6400   unsigned FirstOperandIdx = 1;
6401 
6402   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6403     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6404 
6405     // Add the register arguments
6406     if (Op.isReg()) {
6407       Op.addRegOperands(Inst, 1);
6408       // Insert a tied src for atomic return dst.
6409       // This cannot be postponed as subsequent calls to
6410       // addImmOperands rely on correct number of MC operands.
6411       if (IsAtomicReturn && i == FirstOperandIdx)
6412         Op.addRegOperands(Inst, 1);
6413       continue;
6414     }
6415 
6416     // Handle the case where soffset is an immediate
6417     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6418       Op.addImmOperands(Inst, 1);
6419       continue;
6420     }
6421 
6422     HasLdsModifier |= Op.isLDS();
6423 
6424     // Handle tokens like 'offen' which are sometimes hard-coded into the
6425     // asm string.  There are no MCInst operands for these.
6426     if (Op.isToken()) {
6427       continue;
6428     }
6429     assert(Op.isImm());
6430 
6431     // Handle optional arguments
6432     OptionalIdx[Op.getImmTy()] = i;
6433   }
6434 
6435   // This is a workaround for an llvm quirk which may result in an
6436   // incorrect instruction selection. Lds and non-lds versions of
6437   // MUBUF instructions are identical except that lds versions
6438   // have mandatory 'lds' modifier. However this modifier follows
6439   // optional modifiers and llvm asm matcher regards this 'lds'
6440   // modifier as an optional one. As a result, an lds version
6441   // of opcode may be selected even if it has no 'lds' modifier.
6442   if (IsLdsOpcode && !HasLdsModifier) {
6443     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6444     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6445       Inst.setOpcode(NoLdsOpcode);
6446       IsLdsOpcode = false;
6447     }
6448   }
6449 
6450   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6451   if (!IsAtomic) { // glc is hard-coded.
6452     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6453   }
6454   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6455 
6456   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6457     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6458   }
6459 
6460   if (isGFX10())
6461     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6462 }
6463 
6464 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6465   OptionalImmIndexMap OptionalIdx;
6466 
6467   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6468     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6469 
6470     // Add the register arguments
6471     if (Op.isReg()) {
6472       Op.addRegOperands(Inst, 1);
6473       continue;
6474     }
6475 
6476     // Handle the case where soffset is an immediate
6477     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6478       Op.addImmOperands(Inst, 1);
6479       continue;
6480     }
6481 
6482     // Handle tokens like 'offen' which are sometimes hard-coded into the
6483     // asm string.  There are no MCInst operands for these.
6484     if (Op.isToken()) {
6485       continue;
6486     }
6487     assert(Op.isImm());
6488 
6489     // Handle optional arguments
6490     OptionalIdx[Op.getImmTy()] = i;
6491   }
6492 
6493   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6494                         AMDGPUOperand::ImmTyOffset);
6495   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6496   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6497   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6498   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6499 
6500   if (isGFX10())
6501     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6502 }
6503 
6504 //===----------------------------------------------------------------------===//
6505 // mimg
6506 //===----------------------------------------------------------------------===//
6507 
6508 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6509                               bool IsAtomic) {
6510   unsigned I = 1;
6511   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6512   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6513     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6514   }
6515 
6516   if (IsAtomic) {
6517     // Add src, same as dst
6518     assert(Desc.getNumDefs() == 1);
6519     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6520   }
6521 
6522   OptionalImmIndexMap OptionalIdx;
6523 
6524   for (unsigned E = Operands.size(); I != E; ++I) {
6525     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6526 
6527     // Add the register arguments
6528     if (Op.isReg()) {
6529       Op.addRegOperands(Inst, 1);
6530     } else if (Op.isImmModifier()) {
6531       OptionalIdx[Op.getImmTy()] = I;
6532     } else if (!Op.isToken()) {
6533       llvm_unreachable("unexpected operand type");
6534     }
6535   }
6536 
6537   bool IsGFX10 = isGFX10();
6538 
6539   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6540   if (IsGFX10)
6541     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6542   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6543   if (IsGFX10)
6544     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6545   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6546   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6547   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6548   if (IsGFX10)
6549     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6550   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6551   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6552   if (!IsGFX10)
6553     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6554   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6555 }
6556 
6557 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6558   cvtMIMG(Inst, Operands, true);
6559 }
6560 
6561 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6562                                       const OperandVector &Operands) {
6563   for (unsigned I = 1; I < Operands.size(); ++I) {
6564     auto &Operand = (AMDGPUOperand &)*Operands[I];
6565     if (Operand.isReg())
6566       Operand.addRegOperands(Inst, 1);
6567   }
6568 
6569   Inst.addOperand(MCOperand::createImm(1)); // a16
6570 }
6571 
6572 //===----------------------------------------------------------------------===//
6573 // smrd
6574 //===----------------------------------------------------------------------===//
6575 
6576 bool AMDGPUOperand::isSMRDOffset8() const {
6577   return isImm() && isUInt<8>(getImm());
6578 }
6579 
6580 bool AMDGPUOperand::isSMEMOffset() const {
6581   return isImm(); // Offset range is checked later by validator.
6582 }
6583 
6584 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6585   // 32-bit literals are only supported on CI and we only want to use them
6586   // when the offset is > 8-bits.
6587   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6588 }
6589 
6590 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6591   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6592 }
6593 
6594 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6595   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6596 }
6597 
6598 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6599   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6600 }
6601 
6602 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6603   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6604 }
6605 
6606 //===----------------------------------------------------------------------===//
6607 // vop3
6608 //===----------------------------------------------------------------------===//
6609 
6610 static bool ConvertOmodMul(int64_t &Mul) {
6611   if (Mul != 1 && Mul != 2 && Mul != 4)
6612     return false;
6613 
6614   Mul >>= 1;
6615   return true;
6616 }
6617 
6618 static bool ConvertOmodDiv(int64_t &Div) {
6619   if (Div == 1) {
6620     Div = 0;
6621     return true;
6622   }
6623 
6624   if (Div == 2) {
6625     Div = 3;
6626     return true;
6627   }
6628 
6629   return false;
6630 }
6631 
6632 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6633   if (BoundCtrl == 0) {
6634     BoundCtrl = 1;
6635     return true;
6636   }
6637 
6638   if (BoundCtrl == -1) {
6639     BoundCtrl = 0;
6640     return true;
6641   }
6642 
6643   return false;
6644 }
6645 
6646 // Note: the order in this table matches the order of operands in AsmString.
6647 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6648   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6649   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6650   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6651   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6652   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6653   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6654   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6655   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6656   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6657   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6658   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6659   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6660   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6661   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6662   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6663   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6664   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6665   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6666   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6667   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6668   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6669   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6670   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6671   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6672   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6673   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6674   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6675   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6676   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6677   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6678   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6679   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6680   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6681   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6682   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6683   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6684   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6685   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6686   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6687   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6688   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6689   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6690   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6691 };
6692 
6693 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6694 
6695   OperandMatchResultTy res = parseOptionalOpr(Operands);
6696 
6697   // This is a hack to enable hardcoded mandatory operands which follow
6698   // optional operands.
6699   //
6700   // Current design assumes that all operands after the first optional operand
6701   // are also optional. However implementation of some instructions violates
6702   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6703   //
6704   // To alleviate this problem, we have to (implicitly) parse extra operands
6705   // to make sure autogenerated parser of custom operands never hit hardcoded
6706   // mandatory operands.
6707 
6708   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6709     if (res != MatchOperand_Success ||
6710         isToken(AsmToken::EndOfStatement))
6711       break;
6712 
6713     trySkipToken(AsmToken::Comma);
6714     res = parseOptionalOpr(Operands);
6715   }
6716 
6717   return res;
6718 }
6719 
6720 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6721   OperandMatchResultTy res;
6722   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6723     // try to parse any optional operand here
6724     if (Op.IsBit) {
6725       res = parseNamedBit(Op.Name, Operands, Op.Type);
6726     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6727       res = parseOModOperand(Operands);
6728     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6729                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6730                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6731       res = parseSDWASel(Operands, Op.Name, Op.Type);
6732     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6733       res = parseSDWADstUnused(Operands);
6734     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6735                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6736                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6737                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6738       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6739                                         Op.ConvertResult);
6740     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6741       res = parseDim(Operands);
6742     } else {
6743       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6744     }
6745     if (res != MatchOperand_NoMatch) {
6746       return res;
6747     }
6748   }
6749   return MatchOperand_NoMatch;
6750 }
6751 
6752 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6753   StringRef Name = Parser.getTok().getString();
6754   if (Name == "mul") {
6755     return parseIntWithPrefix("mul", Operands,
6756                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6757   }
6758 
6759   if (Name == "div") {
6760     return parseIntWithPrefix("div", Operands,
6761                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6762   }
6763 
6764   return MatchOperand_NoMatch;
6765 }
6766 
6767 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6768   cvtVOP3P(Inst, Operands);
6769 
6770   int Opc = Inst.getOpcode();
6771 
6772   int SrcNum;
6773   const int Ops[] = { AMDGPU::OpName::src0,
6774                       AMDGPU::OpName::src1,
6775                       AMDGPU::OpName::src2 };
6776   for (SrcNum = 0;
6777        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6778        ++SrcNum);
6779   assert(SrcNum > 0);
6780 
6781   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6782   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6783 
6784   if ((OpSel & (1 << SrcNum)) != 0) {
6785     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6786     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6787     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6788   }
6789 }
6790 
6791 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6792       // 1. This operand is input modifiers
6793   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6794       // 2. This is not last operand
6795       && Desc.NumOperands > (OpNum + 1)
6796       // 3. Next operand is register class
6797       && Desc.OpInfo[OpNum + 1].RegClass != -1
6798       // 4. Next register is not tied to any other operand
6799       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6800 }
6801 
6802 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6803 {
6804   OptionalImmIndexMap OptionalIdx;
6805   unsigned Opc = Inst.getOpcode();
6806 
6807   unsigned I = 1;
6808   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6809   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6810     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6811   }
6812 
6813   for (unsigned E = Operands.size(); I != E; ++I) {
6814     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6815     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6816       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6817     } else if (Op.isInterpSlot() ||
6818                Op.isInterpAttr() ||
6819                Op.isAttrChan()) {
6820       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6821     } else if (Op.isImmModifier()) {
6822       OptionalIdx[Op.getImmTy()] = I;
6823     } else {
6824       llvm_unreachable("unhandled operand type");
6825     }
6826   }
6827 
6828   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6829     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6830   }
6831 
6832   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6833     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6834   }
6835 
6836   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6837     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6838   }
6839 }
6840 
6841 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6842                               OptionalImmIndexMap &OptionalIdx) {
6843   unsigned Opc = Inst.getOpcode();
6844 
6845   unsigned I = 1;
6846   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6847   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6848     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6849   }
6850 
6851   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6852     // This instruction has src modifiers
6853     for (unsigned E = Operands.size(); I != E; ++I) {
6854       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6855       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6856         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6857       } else if (Op.isImmModifier()) {
6858         OptionalIdx[Op.getImmTy()] = I;
6859       } else if (Op.isRegOrImm()) {
6860         Op.addRegOrImmOperands(Inst, 1);
6861       } else {
6862         llvm_unreachable("unhandled operand type");
6863       }
6864     }
6865   } else {
6866     // No src modifiers
6867     for (unsigned E = Operands.size(); I != E; ++I) {
6868       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6869       if (Op.isMod()) {
6870         OptionalIdx[Op.getImmTy()] = I;
6871       } else {
6872         Op.addRegOrImmOperands(Inst, 1);
6873       }
6874     }
6875   }
6876 
6877   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6878     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6879   }
6880 
6881   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6882     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6883   }
6884 
6885   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6886   // it has src2 register operand that is tied to dst operand
6887   // we don't allow modifiers for this operand in assembler so src2_modifiers
6888   // should be 0.
6889   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6890       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6891       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6892       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
6893       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
6894       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6895       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6896       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6897       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
6898       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6899     auto it = Inst.begin();
6900     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6901     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6902     ++it;
6903     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6904   }
6905 }
6906 
6907 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6908   OptionalImmIndexMap OptionalIdx;
6909   cvtVOP3(Inst, Operands, OptionalIdx);
6910 }
6911 
6912 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6913                                const OperandVector &Operands) {
6914   OptionalImmIndexMap OptIdx;
6915   const int Opc = Inst.getOpcode();
6916   const MCInstrDesc &Desc = MII.get(Opc);
6917 
6918   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6919 
6920   cvtVOP3(Inst, Operands, OptIdx);
6921 
6922   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6923     assert(!IsPacked);
6924     Inst.addOperand(Inst.getOperand(0));
6925   }
6926 
6927   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6928   // instruction, and then figure out where to actually put the modifiers
6929 
6930   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6931 
6932   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6933   if (OpSelHiIdx != -1) {
6934     int DefaultVal = IsPacked ? -1 : 0;
6935     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6936                           DefaultVal);
6937   }
6938 
6939   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6940   if (NegLoIdx != -1) {
6941     assert(IsPacked);
6942     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6943     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6944   }
6945 
6946   const int Ops[] = { AMDGPU::OpName::src0,
6947                       AMDGPU::OpName::src1,
6948                       AMDGPU::OpName::src2 };
6949   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6950                          AMDGPU::OpName::src1_modifiers,
6951                          AMDGPU::OpName::src2_modifiers };
6952 
6953   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6954 
6955   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6956   unsigned OpSelHi = 0;
6957   unsigned NegLo = 0;
6958   unsigned NegHi = 0;
6959 
6960   if (OpSelHiIdx != -1) {
6961     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6962   }
6963 
6964   if (NegLoIdx != -1) {
6965     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6966     NegLo = Inst.getOperand(NegLoIdx).getImm();
6967     NegHi = Inst.getOperand(NegHiIdx).getImm();
6968   }
6969 
6970   for (int J = 0; J < 3; ++J) {
6971     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6972     if (OpIdx == -1)
6973       break;
6974 
6975     uint32_t ModVal = 0;
6976 
6977     if ((OpSel & (1 << J)) != 0)
6978       ModVal |= SISrcMods::OP_SEL_0;
6979 
6980     if ((OpSelHi & (1 << J)) != 0)
6981       ModVal |= SISrcMods::OP_SEL_1;
6982 
6983     if ((NegLo & (1 << J)) != 0)
6984       ModVal |= SISrcMods::NEG;
6985 
6986     if ((NegHi & (1 << J)) != 0)
6987       ModVal |= SISrcMods::NEG_HI;
6988 
6989     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6990 
6991     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6992   }
6993 }
6994 
6995 //===----------------------------------------------------------------------===//
6996 // dpp
6997 //===----------------------------------------------------------------------===//
6998 
6999 bool AMDGPUOperand::isDPP8() const {
7000   return isImmTy(ImmTyDPP8);
7001 }
7002 
7003 bool AMDGPUOperand::isDPPCtrl() const {
7004   using namespace AMDGPU::DPP;
7005 
7006   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7007   if (result) {
7008     int64_t Imm = getImm();
7009     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7010            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7011            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7012            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7013            (Imm == DppCtrl::WAVE_SHL1) ||
7014            (Imm == DppCtrl::WAVE_ROL1) ||
7015            (Imm == DppCtrl::WAVE_SHR1) ||
7016            (Imm == DppCtrl::WAVE_ROR1) ||
7017            (Imm == DppCtrl::ROW_MIRROR) ||
7018            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7019            (Imm == DppCtrl::BCAST15) ||
7020            (Imm == DppCtrl::BCAST31) ||
7021            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7022            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7023   }
7024   return false;
7025 }
7026 
7027 //===----------------------------------------------------------------------===//
7028 // mAI
7029 //===----------------------------------------------------------------------===//
7030 
7031 bool AMDGPUOperand::isBLGP() const {
7032   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7033 }
7034 
7035 bool AMDGPUOperand::isCBSZ() const {
7036   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7037 }
7038 
7039 bool AMDGPUOperand::isABID() const {
7040   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7041 }
7042 
7043 bool AMDGPUOperand::isS16Imm() const {
7044   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7045 }
7046 
7047 bool AMDGPUOperand::isU16Imm() const {
7048   return isImm() && isUInt<16>(getImm());
7049 }
7050 
7051 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7052   if (!isGFX10())
7053     return MatchOperand_NoMatch;
7054 
7055   SMLoc S = Parser.getTok().getLoc();
7056 
7057   if (getLexer().isNot(AsmToken::Identifier))
7058     return MatchOperand_NoMatch;
7059   if (getLexer().getTok().getString() != "dim")
7060     return MatchOperand_NoMatch;
7061 
7062   Parser.Lex();
7063   if (getLexer().isNot(AsmToken::Colon))
7064     return MatchOperand_ParseFail;
7065 
7066   Parser.Lex();
7067 
7068   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7069   // integer.
7070   std::string Token;
7071   if (getLexer().is(AsmToken::Integer)) {
7072     SMLoc Loc = getLexer().getTok().getEndLoc();
7073     Token = std::string(getLexer().getTok().getString());
7074     Parser.Lex();
7075     if (getLexer().getTok().getLoc() != Loc)
7076       return MatchOperand_ParseFail;
7077   }
7078   if (getLexer().isNot(AsmToken::Identifier))
7079     return MatchOperand_ParseFail;
7080   Token += getLexer().getTok().getString();
7081 
7082   StringRef DimId = Token;
7083   if (DimId.startswith("SQ_RSRC_IMG_"))
7084     DimId = DimId.substr(12);
7085 
7086   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7087   if (!DimInfo)
7088     return MatchOperand_ParseFail;
7089 
7090   Parser.Lex();
7091 
7092   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7093                                               AMDGPUOperand::ImmTyDim));
7094   return MatchOperand_Success;
7095 }
7096 
7097 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7098   SMLoc S = Parser.getTok().getLoc();
7099   StringRef Prefix;
7100 
7101   if (getLexer().getKind() == AsmToken::Identifier) {
7102     Prefix = Parser.getTok().getString();
7103   } else {
7104     return MatchOperand_NoMatch;
7105   }
7106 
7107   if (Prefix != "dpp8")
7108     return parseDPPCtrl(Operands);
7109   if (!isGFX10())
7110     return MatchOperand_NoMatch;
7111 
7112   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7113 
7114   int64_t Sels[8];
7115 
7116   Parser.Lex();
7117   if (getLexer().isNot(AsmToken::Colon))
7118     return MatchOperand_ParseFail;
7119 
7120   Parser.Lex();
7121   if (getLexer().isNot(AsmToken::LBrac))
7122     return MatchOperand_ParseFail;
7123 
7124   Parser.Lex();
7125   if (getParser().parseAbsoluteExpression(Sels[0]))
7126     return MatchOperand_ParseFail;
7127   if (0 > Sels[0] || 7 < Sels[0])
7128     return MatchOperand_ParseFail;
7129 
7130   for (size_t i = 1; i < 8; ++i) {
7131     if (getLexer().isNot(AsmToken::Comma))
7132       return MatchOperand_ParseFail;
7133 
7134     Parser.Lex();
7135     if (getParser().parseAbsoluteExpression(Sels[i]))
7136       return MatchOperand_ParseFail;
7137     if (0 > Sels[i] || 7 < Sels[i])
7138       return MatchOperand_ParseFail;
7139   }
7140 
7141   if (getLexer().isNot(AsmToken::RBrac))
7142     return MatchOperand_ParseFail;
7143   Parser.Lex();
7144 
7145   unsigned DPP8 = 0;
7146   for (size_t i = 0; i < 8; ++i)
7147     DPP8 |= (Sels[i] << (i * 3));
7148 
7149   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7150   return MatchOperand_Success;
7151 }
7152 
7153 OperandMatchResultTy
7154 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7155   using namespace AMDGPU::DPP;
7156 
7157   SMLoc S = Parser.getTok().getLoc();
7158   StringRef Prefix;
7159   int64_t Int;
7160 
7161   if (getLexer().getKind() == AsmToken::Identifier) {
7162     Prefix = Parser.getTok().getString();
7163   } else {
7164     return MatchOperand_NoMatch;
7165   }
7166 
7167   if (Prefix == "row_mirror") {
7168     Int = DppCtrl::ROW_MIRROR;
7169     Parser.Lex();
7170   } else if (Prefix == "row_half_mirror") {
7171     Int = DppCtrl::ROW_HALF_MIRROR;
7172     Parser.Lex();
7173   } else {
7174     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7175     if (Prefix != "quad_perm"
7176         && Prefix != "row_shl"
7177         && Prefix != "row_shr"
7178         && Prefix != "row_ror"
7179         && Prefix != "wave_shl"
7180         && Prefix != "wave_rol"
7181         && Prefix != "wave_shr"
7182         && Prefix != "wave_ror"
7183         && Prefix != "row_bcast"
7184         && Prefix != "row_share"
7185         && Prefix != "row_xmask") {
7186       return MatchOperand_NoMatch;
7187     }
7188 
7189     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7190       return MatchOperand_NoMatch;
7191 
7192     if (!isVI() && !isGFX9() &&
7193         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7194          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7195          Prefix == "row_bcast"))
7196       return MatchOperand_NoMatch;
7197 
7198     Parser.Lex();
7199     if (getLexer().isNot(AsmToken::Colon))
7200       return MatchOperand_ParseFail;
7201 
7202     if (Prefix == "quad_perm") {
7203       // quad_perm:[%d,%d,%d,%d]
7204       Parser.Lex();
7205       if (getLexer().isNot(AsmToken::LBrac))
7206         return MatchOperand_ParseFail;
7207       Parser.Lex();
7208 
7209       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7210         return MatchOperand_ParseFail;
7211 
7212       for (int i = 0; i < 3; ++i) {
7213         if (getLexer().isNot(AsmToken::Comma))
7214           return MatchOperand_ParseFail;
7215         Parser.Lex();
7216 
7217         int64_t Temp;
7218         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7219           return MatchOperand_ParseFail;
7220         const int shift = i*2 + 2;
7221         Int += (Temp << shift);
7222       }
7223 
7224       if (getLexer().isNot(AsmToken::RBrac))
7225         return MatchOperand_ParseFail;
7226       Parser.Lex();
7227     } else {
7228       // sel:%d
7229       Parser.Lex();
7230       if (getParser().parseAbsoluteExpression(Int))
7231         return MatchOperand_ParseFail;
7232 
7233       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7234         Int |= DppCtrl::ROW_SHL0;
7235       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7236         Int |= DppCtrl::ROW_SHR0;
7237       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7238         Int |= DppCtrl::ROW_ROR0;
7239       } else if (Prefix == "wave_shl" && 1 == Int) {
7240         Int = DppCtrl::WAVE_SHL1;
7241       } else if (Prefix == "wave_rol" && 1 == Int) {
7242         Int = DppCtrl::WAVE_ROL1;
7243       } else if (Prefix == "wave_shr" && 1 == Int) {
7244         Int = DppCtrl::WAVE_SHR1;
7245       } else if (Prefix == "wave_ror" && 1 == Int) {
7246         Int = DppCtrl::WAVE_ROR1;
7247       } else if (Prefix == "row_bcast") {
7248         if (Int == 15) {
7249           Int = DppCtrl::BCAST15;
7250         } else if (Int == 31) {
7251           Int = DppCtrl::BCAST31;
7252         } else {
7253           return MatchOperand_ParseFail;
7254         }
7255       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7256         Int |= DppCtrl::ROW_SHARE_FIRST;
7257       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7258         Int |= DppCtrl::ROW_XMASK_FIRST;
7259       } else {
7260         return MatchOperand_ParseFail;
7261       }
7262     }
7263   }
7264 
7265   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7266   return MatchOperand_Success;
7267 }
7268 
7269 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7270   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7271 }
7272 
7273 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7274   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7275 }
7276 
7277 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7278   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7279 }
7280 
7281 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7282   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7283 }
7284 
7285 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7286   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7287 }
7288 
7289 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7290   OptionalImmIndexMap OptionalIdx;
7291 
7292   unsigned I = 1;
7293   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7294   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7295     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7296   }
7297 
7298   int Fi = 0;
7299   for (unsigned E = Operands.size(); I != E; ++I) {
7300     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7301                                             MCOI::TIED_TO);
7302     if (TiedTo != -1) {
7303       assert((unsigned)TiedTo < Inst.getNumOperands());
7304       // handle tied old or src2 for MAC instructions
7305       Inst.addOperand(Inst.getOperand(TiedTo));
7306     }
7307     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7308     // Add the register arguments
7309     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7310       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7311       // Skip it.
7312       continue;
7313     }
7314 
7315     if (IsDPP8) {
7316       if (Op.isDPP8()) {
7317         Op.addImmOperands(Inst, 1);
7318       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7319         Op.addRegWithFPInputModsOperands(Inst, 2);
7320       } else if (Op.isFI()) {
7321         Fi = Op.getImm();
7322       } else if (Op.isReg()) {
7323         Op.addRegOperands(Inst, 1);
7324       } else {
7325         llvm_unreachable("Invalid operand type");
7326       }
7327     } else {
7328       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7329         Op.addRegWithFPInputModsOperands(Inst, 2);
7330       } else if (Op.isDPPCtrl()) {
7331         Op.addImmOperands(Inst, 1);
7332       } else if (Op.isImm()) {
7333         // Handle optional arguments
7334         OptionalIdx[Op.getImmTy()] = I;
7335       } else {
7336         llvm_unreachable("Invalid operand type");
7337       }
7338     }
7339   }
7340 
7341   if (IsDPP8) {
7342     using namespace llvm::AMDGPU::DPP;
7343     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7344   } else {
7345     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7346     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7347     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7348     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7349       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7350     }
7351   }
7352 }
7353 
7354 //===----------------------------------------------------------------------===//
7355 // sdwa
7356 //===----------------------------------------------------------------------===//
7357 
7358 OperandMatchResultTy
7359 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7360                               AMDGPUOperand::ImmTy Type) {
7361   using namespace llvm::AMDGPU::SDWA;
7362 
7363   SMLoc S = Parser.getTok().getLoc();
7364   StringRef Value;
7365   OperandMatchResultTy res;
7366 
7367   res = parseStringWithPrefix(Prefix, Value);
7368   if (res != MatchOperand_Success) {
7369     return res;
7370   }
7371 
7372   int64_t Int;
7373   Int = StringSwitch<int64_t>(Value)
7374         .Case("BYTE_0", SdwaSel::BYTE_0)
7375         .Case("BYTE_1", SdwaSel::BYTE_1)
7376         .Case("BYTE_2", SdwaSel::BYTE_2)
7377         .Case("BYTE_3", SdwaSel::BYTE_3)
7378         .Case("WORD_0", SdwaSel::WORD_0)
7379         .Case("WORD_1", SdwaSel::WORD_1)
7380         .Case("DWORD", SdwaSel::DWORD)
7381         .Default(0xffffffff);
7382   Parser.Lex(); // eat last token
7383 
7384   if (Int == 0xffffffff) {
7385     return MatchOperand_ParseFail;
7386   }
7387 
7388   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7389   return MatchOperand_Success;
7390 }
7391 
7392 OperandMatchResultTy
7393 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7394   using namespace llvm::AMDGPU::SDWA;
7395 
7396   SMLoc S = Parser.getTok().getLoc();
7397   StringRef Value;
7398   OperandMatchResultTy res;
7399 
7400   res = parseStringWithPrefix("dst_unused", Value);
7401   if (res != MatchOperand_Success) {
7402     return res;
7403   }
7404 
7405   int64_t Int;
7406   Int = StringSwitch<int64_t>(Value)
7407         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7408         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7409         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7410         .Default(0xffffffff);
7411   Parser.Lex(); // eat last token
7412 
7413   if (Int == 0xffffffff) {
7414     return MatchOperand_ParseFail;
7415   }
7416 
7417   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7418   return MatchOperand_Success;
7419 }
7420 
7421 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7422   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7423 }
7424 
7425 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7426   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7427 }
7428 
7429 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7430   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7431 }
7432 
7433 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7434   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7435 }
7436 
7437 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7438   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7439 }
7440 
7441 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7442                               uint64_t BasicInstType,
7443                               bool SkipDstVcc,
7444                               bool SkipSrcVcc) {
7445   using namespace llvm::AMDGPU::SDWA;
7446 
7447   OptionalImmIndexMap OptionalIdx;
7448   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7449   bool SkippedVcc = false;
7450 
7451   unsigned I = 1;
7452   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7453   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7454     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7455   }
7456 
7457   for (unsigned E = Operands.size(); I != E; ++I) {
7458     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7459     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7460         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7461       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7462       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7463       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7464       // Skip VCC only if we didn't skip it on previous iteration.
7465       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7466       if (BasicInstType == SIInstrFlags::VOP2 &&
7467           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7468            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7469         SkippedVcc = true;
7470         continue;
7471       } else if (BasicInstType == SIInstrFlags::VOPC &&
7472                  Inst.getNumOperands() == 0) {
7473         SkippedVcc = true;
7474         continue;
7475       }
7476     }
7477     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7478       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7479     } else if (Op.isImm()) {
7480       // Handle optional arguments
7481       OptionalIdx[Op.getImmTy()] = I;
7482     } else {
7483       llvm_unreachable("Invalid operand type");
7484     }
7485     SkippedVcc = false;
7486   }
7487 
7488   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7489       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7490       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7491     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7492     switch (BasicInstType) {
7493     case SIInstrFlags::VOP1:
7494       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7495       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7496         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7497       }
7498       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7499       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7500       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7501       break;
7502 
7503     case SIInstrFlags::VOP2:
7504       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7505       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7506         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7507       }
7508       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7509       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7510       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7511       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7512       break;
7513 
7514     case SIInstrFlags::VOPC:
7515       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7516         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7517       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7518       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7519       break;
7520 
7521     default:
7522       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7523     }
7524   }
7525 
7526   // special case v_mac_{f16, f32}:
7527   // it has src2 register operand that is tied to dst operand
7528   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7529       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7530     auto it = Inst.begin();
7531     std::advance(
7532       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7533     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7534   }
7535 }
7536 
7537 //===----------------------------------------------------------------------===//
7538 // mAI
7539 //===----------------------------------------------------------------------===//
7540 
7541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7542   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7543 }
7544 
7545 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7546   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7547 }
7548 
7549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7550   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7551 }
7552 
7553 /// Force static initialization.
7554 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7555   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7556   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7557 }
7558 
7559 #define GET_REGISTER_MATCHER
7560 #define GET_MATCHER_IMPLEMENTATION
7561 #define GET_MNEMONIC_SPELL_CHECKER
7562 #define GET_MNEMONIC_CHECKER
7563 #include "AMDGPUGenAsmMatcher.inc"
7564 
7565 // This fuction should be defined after auto-generated include so that we have
7566 // MatchClassKind enum defined
7567 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7568                                                      unsigned Kind) {
7569   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7570   // But MatchInstructionImpl() expects to meet token and fails to validate
7571   // operand. This method checks if we are given immediate operand but expect to
7572   // get corresponding token.
7573   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7574   switch (Kind) {
7575   case MCK_addr64:
7576     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7577   case MCK_gds:
7578     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7579   case MCK_lds:
7580     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7581   case MCK_glc:
7582     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7583   case MCK_idxen:
7584     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7585   case MCK_offen:
7586     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7587   case MCK_SSrcB32:
7588     // When operands have expression values, they will return true for isToken,
7589     // because it is not possible to distinguish between a token and an
7590     // expression at parse time. MatchInstructionImpl() will always try to
7591     // match an operand as a token, when isToken returns true, and when the
7592     // name of the expression is not a valid token, the match will fail,
7593     // so we need to handle it here.
7594     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7595   case MCK_SSrcF32:
7596     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7597   case MCK_SoppBrTarget:
7598     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7599   case MCK_VReg32OrOff:
7600     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7601   case MCK_InterpSlot:
7602     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7603   case MCK_Attr:
7604     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7605   case MCK_AttrChan:
7606     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7607   case MCK_ImmSMEMOffset:
7608     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7609   case MCK_SReg_64:
7610   case MCK_SReg_64_XEXEC:
7611     // Null is defined as a 32-bit register but
7612     // it should also be enabled with 64-bit operands.
7613     // The following code enables it for SReg_64 operands
7614     // used as source and destination. Remaining source
7615     // operands are handled in isInlinableImm.
7616     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7617   default:
7618     return Match_InvalidOperand;
7619   }
7620 }
7621 
7622 //===----------------------------------------------------------------------===//
7623 // endpgm
7624 //===----------------------------------------------------------------------===//
7625 
7626 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7627   SMLoc S = Parser.getTok().getLoc();
7628   int64_t Imm = 0;
7629 
7630   if (!parseExpr(Imm)) {
7631     // The operand is optional, if not present default to 0
7632     Imm = 0;
7633   }
7634 
7635   if (!isUInt<16>(Imm)) {
7636     Error(S, "expected a 16-bit value");
7637     return MatchOperand_ParseFail;
7638   }
7639 
7640   Operands.push_back(
7641       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7642   return MatchOperand_Success;
7643 }
7644 
7645 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7646