1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth,
1092                          SMLoc Loc);
1093 
1094   bool isRegister();
1095   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1096   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1097   void initializeGprCountSymbol(RegisterKind RegKind);
1098   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1099                              unsigned RegWidth);
1100   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1101                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1102   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1103                  bool IsGdsHardcoded);
1104 
1105 public:
1106   enum AMDGPUMatchResultTy {
1107     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1108   };
1109   enum OperandMode {
1110     OperandMode_Default,
1111     OperandMode_NSA,
1112   };
1113 
1114   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1115 
1116   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1117                const MCInstrInfo &MII,
1118                const MCTargetOptions &Options)
1119       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1120     MCAsmParserExtension::Initialize(Parser);
1121 
1122     if (getFeatureBits().none()) {
1123       // Set default features.
1124       copySTI().ToggleFeature("southern-islands");
1125     }
1126 
1127     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1128 
1129     {
1130       // TODO: make those pre-defined variables read-only.
1131       // Currently there is none suitable machinery in the core llvm-mc for this.
1132       // MCSymbol::isRedefinable is intended for another purpose, and
1133       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1134       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1135       MCContext &Ctx = getContext();
1136       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1137         MCSymbol *Sym =
1138             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1144       } else {
1145         MCSymbol *Sym =
1146             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1147         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1148         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1149         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1150         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1151         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1152       }
1153       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1154         initializeGprCountSymbol(IS_VGPR);
1155         initializeGprCountSymbol(IS_SGPR);
1156       } else
1157         KernelScope.initialize(getContext());
1158     }
1159   }
1160 
1161   bool hasXNACK() const {
1162     return AMDGPU::hasXNACK(getSTI());
1163   }
1164 
1165   bool hasMIMG_R128() const {
1166     return AMDGPU::hasMIMG_R128(getSTI());
1167   }
1168 
1169   bool hasPackedD16() const {
1170     return AMDGPU::hasPackedD16(getSTI());
1171   }
1172 
1173   bool hasGFX10A16() const {
1174     return AMDGPU::hasGFX10A16(getSTI());
1175   }
1176 
1177   bool isSI() const {
1178     return AMDGPU::isSI(getSTI());
1179   }
1180 
1181   bool isCI() const {
1182     return AMDGPU::isCI(getSTI());
1183   }
1184 
1185   bool isVI() const {
1186     return AMDGPU::isVI(getSTI());
1187   }
1188 
1189   bool isGFX9() const {
1190     return AMDGPU::isGFX9(getSTI());
1191   }
1192 
1193   bool isGFX10() const {
1194     return AMDGPU::isGFX10(getSTI());
1195   }
1196 
1197   bool isGFX10_BEncoding() const {
1198     return AMDGPU::isGFX10_BEncoding(getSTI());
1199   }
1200 
1201   bool hasInv2PiInlineImm() const {
1202     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1203   }
1204 
1205   bool hasFlatOffsets() const {
1206     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1207   }
1208 
1209   bool hasSGPR102_SGPR103() const {
1210     return !isVI() && !isGFX9();
1211   }
1212 
1213   bool hasSGPR104_SGPR105() const {
1214     return isGFX10();
1215   }
1216 
1217   bool hasIntClamp() const {
1218     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1219   }
1220 
1221   AMDGPUTargetStreamer &getTargetStreamer() {
1222     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1223     return static_cast<AMDGPUTargetStreamer &>(TS);
1224   }
1225 
1226   const MCRegisterInfo *getMRI() const {
1227     // We need this const_cast because for some reason getContext() is not const
1228     // in MCAsmParser.
1229     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1230   }
1231 
1232   const MCInstrInfo *getMII() const {
1233     return &MII;
1234   }
1235 
1236   const FeatureBitset &getFeatureBits() const {
1237     return getSTI().getFeatureBits();
1238   }
1239 
1240   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1241   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1242   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1243 
1244   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1245   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1246   bool isForcedDPP() const { return ForcedDPP; }
1247   bool isForcedSDWA() const { return ForcedSDWA; }
1248   ArrayRef<unsigned> getMatchedVariants() const;
1249 
1250   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1251   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1252                      bool RestoreOnFailure);
1253   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1254   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1255                                         SMLoc &EndLoc) override;
1256   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1257   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1258                                       unsigned Kind) override;
1259   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1260                                OperandVector &Operands, MCStreamer &Out,
1261                                uint64_t &ErrorInfo,
1262                                bool MatchingInlineAsm) override;
1263   bool ParseDirective(AsmToken DirectiveID) override;
1264   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1265                                     OperandMode Mode = OperandMode_Default);
1266   StringRef parseMnemonicSuffix(StringRef Name);
1267   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1268                         SMLoc NameLoc, OperandVector &Operands) override;
1269   //bool ProcessInstruction(MCInst &Inst);
1270 
1271   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1272 
1273   OperandMatchResultTy
1274   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1275                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1276                      bool (*ConvertResult)(int64_t &) = nullptr);
1277 
1278   OperandMatchResultTy
1279   parseOperandArrayWithPrefix(const char *Prefix,
1280                               OperandVector &Operands,
1281                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1282                               bool (*ConvertResult)(int64_t&) = nullptr);
1283 
1284   OperandMatchResultTy
1285   parseNamedBit(const char *Name, OperandVector &Operands,
1286                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1287   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1288                                              StringRef &Value);
1289 
1290   bool isModifier();
1291   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1292   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1293   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1294   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1295   bool parseSP3NegModifier();
1296   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1297   OperandMatchResultTy parseReg(OperandVector &Operands);
1298   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1299   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1300   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1301   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1302   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1303   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1304   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1305   OperandMatchResultTy parseUfmt(int64_t &Format);
1306   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1307   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1308   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1309   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1310   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1311   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1312   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1313 
1314   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1315   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1316   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1317   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1318 
1319   bool parseCnt(int64_t &IntVal);
1320   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1321   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1322 
1323 private:
1324   struct OperandInfoTy {
1325     int64_t Id;
1326     bool IsSymbolic = false;
1327     bool IsDefined = false;
1328 
1329     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1330   };
1331 
1332   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1333   bool validateSendMsg(const OperandInfoTy &Msg,
1334                        const OperandInfoTy &Op,
1335                        const OperandInfoTy &Stream,
1336                        const SMLoc Loc);
1337 
1338   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1339   bool validateHwreg(const OperandInfoTy &HwReg,
1340                      const int64_t Offset,
1341                      const int64_t Width,
1342                      const SMLoc Loc);
1343 
1344   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1345   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1346   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1347 
1348   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1349   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1350   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1351   bool validateSOPLiteral(const MCInst &Inst) const;
1352   bool validateConstantBusLimitations(const MCInst &Inst);
1353   bool validateEarlyClobberLimitations(const MCInst &Inst);
1354   bool validateIntClampSupported(const MCInst &Inst);
1355   bool validateMIMGAtomicDMask(const MCInst &Inst);
1356   bool validateMIMGGatherDMask(const MCInst &Inst);
1357   bool validateMovrels(const MCInst &Inst);
1358   bool validateMIMGDataSize(const MCInst &Inst);
1359   bool validateMIMGAddrSize(const MCInst &Inst);
1360   bool validateMIMGD16(const MCInst &Inst);
1361   bool validateMIMGDim(const MCInst &Inst);
1362   bool validateLdsDirect(const MCInst &Inst);
1363   bool validateOpSel(const MCInst &Inst);
1364   bool validateVccOperand(unsigned Reg) const;
1365   bool validateVOP3Literal(const MCInst &Inst) const;
1366   bool validateMAIAccWrite(const MCInst &Inst);
1367   unsigned getConstantBusLimit(unsigned Opcode) const;
1368   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1369   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1370   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1371 
1372   bool isId(const StringRef Id) const;
1373   bool isId(const AsmToken &Token, const StringRef Id) const;
1374   bool isToken(const AsmToken::TokenKind Kind) const;
1375   bool trySkipId(const StringRef Id);
1376   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1377   bool trySkipToken(const AsmToken::TokenKind Kind);
1378   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1379   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1380   bool parseId(StringRef &Val, const StringRef ErrMsg);
1381 
1382   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1383   AsmToken::TokenKind getTokenKind() const;
1384   bool parseExpr(int64_t &Imm);
1385   bool parseExpr(OperandVector &Operands);
1386   StringRef getTokenStr() const;
1387   AsmToken peekToken();
1388   AsmToken getToken() const;
1389   SMLoc getLoc() const;
1390   void lex();
1391 
1392 public:
1393   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1394   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1395 
1396   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1397   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1398   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1399   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1400   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1401   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1402 
1403   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1404                             const unsigned MinVal,
1405                             const unsigned MaxVal,
1406                             const StringRef ErrMsg);
1407   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1408   bool parseSwizzleOffset(int64_t &Imm);
1409   bool parseSwizzleMacro(int64_t &Imm);
1410   bool parseSwizzleQuadPerm(int64_t &Imm);
1411   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1412   bool parseSwizzleBroadcast(int64_t &Imm);
1413   bool parseSwizzleSwap(int64_t &Imm);
1414   bool parseSwizzleReverse(int64_t &Imm);
1415 
1416   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1417   int64_t parseGPRIdxMacro();
1418 
1419   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1420   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1421   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1422   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1423   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1424 
1425   AMDGPUOperand::Ptr defaultDLC() const;
1426   AMDGPUOperand::Ptr defaultGLC() const;
1427   AMDGPUOperand::Ptr defaultSLC() const;
1428 
1429   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1430   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1431   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1432   AMDGPUOperand::Ptr defaultFlatOffset() const;
1433 
1434   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1435 
1436   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1437                OptionalImmIndexMap &OptionalIdx);
1438   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1439   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1440   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1441 
1442   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1443 
1444   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1445                bool IsAtomic = false);
1446   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1447 
1448   OperandMatchResultTy parseDim(OperandVector &Operands);
1449   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1450   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1451   AMDGPUOperand::Ptr defaultRowMask() const;
1452   AMDGPUOperand::Ptr defaultBankMask() const;
1453   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1454   AMDGPUOperand::Ptr defaultFI() const;
1455   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1456   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1457 
1458   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1459                                     AMDGPUOperand::ImmTy Type);
1460   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1461   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1462   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1463   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1464   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1465   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1466   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1467                uint64_t BasicInstType,
1468                bool SkipDstVcc = false,
1469                bool SkipSrcVcc = false);
1470 
1471   AMDGPUOperand::Ptr defaultBLGP() const;
1472   AMDGPUOperand::Ptr defaultCBSZ() const;
1473   AMDGPUOperand::Ptr defaultABID() const;
1474 
1475   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1476   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1477 };
1478 
1479 struct OptionalOperand {
1480   const char *Name;
1481   AMDGPUOperand::ImmTy Type;
1482   bool IsBit;
1483   bool (*ConvertResult)(int64_t&);
1484 };
1485 
1486 } // end anonymous namespace
1487 
1488 // May be called with integer type with equivalent bitwidth.
1489 static const fltSemantics *getFltSemantics(unsigned Size) {
1490   switch (Size) {
1491   case 4:
1492     return &APFloat::IEEEsingle();
1493   case 8:
1494     return &APFloat::IEEEdouble();
1495   case 2:
1496     return &APFloat::IEEEhalf();
1497   default:
1498     llvm_unreachable("unsupported fp type");
1499   }
1500 }
1501 
1502 static const fltSemantics *getFltSemantics(MVT VT) {
1503   return getFltSemantics(VT.getSizeInBits() / 8);
1504 }
1505 
1506 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1507   switch (OperandType) {
1508   case AMDGPU::OPERAND_REG_IMM_INT32:
1509   case AMDGPU::OPERAND_REG_IMM_FP32:
1510   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1511   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1512   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1513   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1514     return &APFloat::IEEEsingle();
1515   case AMDGPU::OPERAND_REG_IMM_INT64:
1516   case AMDGPU::OPERAND_REG_IMM_FP64:
1517   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1518   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1519     return &APFloat::IEEEdouble();
1520   case AMDGPU::OPERAND_REG_IMM_INT16:
1521   case AMDGPU::OPERAND_REG_IMM_FP16:
1522   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1523   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1524   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1525   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1527   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1528   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1529   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1530   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1531   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1532     return &APFloat::IEEEhalf();
1533   default:
1534     llvm_unreachable("unsupported fp type");
1535   }
1536 }
1537 
1538 //===----------------------------------------------------------------------===//
1539 // Operand
1540 //===----------------------------------------------------------------------===//
1541 
1542 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1543   bool Lost;
1544 
1545   // Convert literal to single precision
1546   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1547                                                APFloat::rmNearestTiesToEven,
1548                                                &Lost);
1549   // We allow precision lost but not overflow or underflow
1550   if (Status != APFloat::opOK &&
1551       Lost &&
1552       ((Status & APFloat::opOverflow)  != 0 ||
1553        (Status & APFloat::opUnderflow) != 0)) {
1554     return false;
1555   }
1556 
1557   return true;
1558 }
1559 
1560 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1561   return isUIntN(Size, Val) || isIntN(Size, Val);
1562 }
1563 
1564 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1565   if (VT.getScalarType() == MVT::i16) {
1566     // FP immediate values are broken.
1567     return isInlinableIntLiteral(Val);
1568   }
1569 
1570   // f16/v2f16 operands work correctly for all values.
1571   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1572 }
1573 
1574 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1575 
1576   // This is a hack to enable named inline values like
1577   // shared_base with both 32-bit and 64-bit operands.
1578   // Note that these values are defined as
1579   // 32-bit operands only.
1580   if (isInlineValue()) {
1581     return true;
1582   }
1583 
1584   if (!isImmTy(ImmTyNone)) {
1585     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1586     return false;
1587   }
1588   // TODO: We should avoid using host float here. It would be better to
1589   // check the float bit values which is what a few other places do.
1590   // We've had bot failures before due to weird NaN support on mips hosts.
1591 
1592   APInt Literal(64, Imm.Val);
1593 
1594   if (Imm.IsFPImm) { // We got fp literal token
1595     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1596       return AMDGPU::isInlinableLiteral64(Imm.Val,
1597                                           AsmParser->hasInv2PiInlineImm());
1598     }
1599 
1600     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1601     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1602       return false;
1603 
1604     if (type.getScalarSizeInBits() == 16) {
1605       return isInlineableLiteralOp16(
1606         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1607         type, AsmParser->hasInv2PiInlineImm());
1608     }
1609 
1610     // Check if single precision literal is inlinable
1611     return AMDGPU::isInlinableLiteral32(
1612       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1613       AsmParser->hasInv2PiInlineImm());
1614   }
1615 
1616   // We got int literal token.
1617   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1618     return AMDGPU::isInlinableLiteral64(Imm.Val,
1619                                         AsmParser->hasInv2PiInlineImm());
1620   }
1621 
1622   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1623     return false;
1624   }
1625 
1626   if (type.getScalarSizeInBits() == 16) {
1627     return isInlineableLiteralOp16(
1628       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1629       type, AsmParser->hasInv2PiInlineImm());
1630   }
1631 
1632   return AMDGPU::isInlinableLiteral32(
1633     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1634     AsmParser->hasInv2PiInlineImm());
1635 }
1636 
1637 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1638   // Check that this immediate can be added as literal
1639   if (!isImmTy(ImmTyNone)) {
1640     return false;
1641   }
1642 
1643   if (!Imm.IsFPImm) {
1644     // We got int literal token.
1645 
1646     if (type == MVT::f64 && hasFPModifiers()) {
1647       // Cannot apply fp modifiers to int literals preserving the same semantics
1648       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1649       // disable these cases.
1650       return false;
1651     }
1652 
1653     unsigned Size = type.getSizeInBits();
1654     if (Size == 64)
1655       Size = 32;
1656 
1657     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1658     // types.
1659     return isSafeTruncation(Imm.Val, Size);
1660   }
1661 
1662   // We got fp literal token
1663   if (type == MVT::f64) { // Expected 64-bit fp operand
1664     // We would set low 64-bits of literal to zeroes but we accept this literals
1665     return true;
1666   }
1667 
1668   if (type == MVT::i64) { // Expected 64-bit int operand
1669     // We don't allow fp literals in 64-bit integer instructions. It is
1670     // unclear how we should encode them.
1671     return false;
1672   }
1673 
1674   // We allow fp literals with f16x2 operands assuming that the specified
1675   // literal goes into the lower half and the upper half is zero. We also
1676   // require that the literal may be losslesly converted to f16.
1677   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1678                      (type == MVT::v2i16)? MVT::i16 : type;
1679 
1680   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1681   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1682 }
1683 
1684 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1685   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1686 }
1687 
1688 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1689   if (AsmParser->isVI())
1690     return isVReg32();
1691   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1692     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1693   else
1694     return false;
1695 }
1696 
1697 bool AMDGPUOperand::isSDWAFP16Operand() const {
1698   return isSDWAOperand(MVT::f16);
1699 }
1700 
1701 bool AMDGPUOperand::isSDWAFP32Operand() const {
1702   return isSDWAOperand(MVT::f32);
1703 }
1704 
1705 bool AMDGPUOperand::isSDWAInt16Operand() const {
1706   return isSDWAOperand(MVT::i16);
1707 }
1708 
1709 bool AMDGPUOperand::isSDWAInt32Operand() const {
1710   return isSDWAOperand(MVT::i32);
1711 }
1712 
1713 bool AMDGPUOperand::isBoolReg() const {
1714   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1715          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1716 }
1717 
1718 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1719 {
1720   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1721   assert(Size == 2 || Size == 4 || Size == 8);
1722 
1723   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1724 
1725   if (Imm.Mods.Abs) {
1726     Val &= ~FpSignMask;
1727   }
1728   if (Imm.Mods.Neg) {
1729     Val ^= FpSignMask;
1730   }
1731 
1732   return Val;
1733 }
1734 
1735 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1736   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1737                              Inst.getNumOperands())) {
1738     addLiteralImmOperand(Inst, Imm.Val,
1739                          ApplyModifiers &
1740                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1741   } else {
1742     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1743     Inst.addOperand(MCOperand::createImm(Imm.Val));
1744   }
1745 }
1746 
1747 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1748   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1749   auto OpNum = Inst.getNumOperands();
1750   // Check that this operand accepts literals
1751   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1752 
1753   if (ApplyModifiers) {
1754     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1755     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1756     Val = applyInputFPModifiers(Val, Size);
1757   }
1758 
1759   APInt Literal(64, Val);
1760   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1761 
1762   if (Imm.IsFPImm) { // We got fp literal token
1763     switch (OpTy) {
1764     case AMDGPU::OPERAND_REG_IMM_INT64:
1765     case AMDGPU::OPERAND_REG_IMM_FP64:
1766     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1767     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1768       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1769                                        AsmParser->hasInv2PiInlineImm())) {
1770         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1771         return;
1772       }
1773 
1774       // Non-inlineable
1775       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1776         // For fp operands we check if low 32 bits are zeros
1777         if (Literal.getLoBits(32) != 0) {
1778           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1779           "Can't encode literal as exact 64-bit floating-point operand. "
1780           "Low 32-bits will be set to zero");
1781         }
1782 
1783         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1784         return;
1785       }
1786 
1787       // We don't allow fp literals in 64-bit integer instructions. It is
1788       // unclear how we should encode them. This case should be checked earlier
1789       // in predicate methods (isLiteralImm())
1790       llvm_unreachable("fp literal in 64-bit integer instruction.");
1791 
1792     case AMDGPU::OPERAND_REG_IMM_INT32:
1793     case AMDGPU::OPERAND_REG_IMM_FP32:
1794     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1795     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1796     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1797     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1798     case AMDGPU::OPERAND_REG_IMM_INT16:
1799     case AMDGPU::OPERAND_REG_IMM_FP16:
1800     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1801     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1802     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1803     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1804     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1805     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1806     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1807     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1808     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1810       bool lost;
1811       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1812       // Convert literal to single precision
1813       FPLiteral.convert(*getOpFltSemantics(OpTy),
1814                         APFloat::rmNearestTiesToEven, &lost);
1815       // We allow precision lost but not overflow or underflow. This should be
1816       // checked earlier in isLiteralImm()
1817 
1818       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1819       Inst.addOperand(MCOperand::createImm(ImmVal));
1820       return;
1821     }
1822     default:
1823       llvm_unreachable("invalid operand size");
1824     }
1825 
1826     return;
1827   }
1828 
1829   // We got int literal token.
1830   // Only sign extend inline immediates.
1831   switch (OpTy) {
1832   case AMDGPU::OPERAND_REG_IMM_INT32:
1833   case AMDGPU::OPERAND_REG_IMM_FP32:
1834   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1835   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1836   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1838   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1839   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1840     if (isSafeTruncation(Val, 32) &&
1841         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1842                                      AsmParser->hasInv2PiInlineImm())) {
1843       Inst.addOperand(MCOperand::createImm(Val));
1844       return;
1845     }
1846 
1847     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1848     return;
1849 
1850   case AMDGPU::OPERAND_REG_IMM_INT64:
1851   case AMDGPU::OPERAND_REG_IMM_FP64:
1852   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1853   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1854     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1855       Inst.addOperand(MCOperand::createImm(Val));
1856       return;
1857     }
1858 
1859     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1860     return;
1861 
1862   case AMDGPU::OPERAND_REG_IMM_INT16:
1863   case AMDGPU::OPERAND_REG_IMM_FP16:
1864   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1868     if (isSafeTruncation(Val, 16) &&
1869         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1870                                      AsmParser->hasInv2PiInlineImm())) {
1871       Inst.addOperand(MCOperand::createImm(Val));
1872       return;
1873     }
1874 
1875     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1876     return;
1877 
1878   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1879   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1882     assert(isSafeTruncation(Val, 16));
1883     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1884                                         AsmParser->hasInv2PiInlineImm()));
1885 
1886     Inst.addOperand(MCOperand::createImm(Val));
1887     return;
1888   }
1889   default:
1890     llvm_unreachable("invalid operand size");
1891   }
1892 }
1893 
1894 template <unsigned Bitwidth>
1895 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1896   APInt Literal(64, Imm.Val);
1897 
1898   if (!Imm.IsFPImm) {
1899     // We got int literal token.
1900     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1901     return;
1902   }
1903 
1904   bool Lost;
1905   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1906   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1907                     APFloat::rmNearestTiesToEven, &Lost);
1908   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1909 }
1910 
1911 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1912   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1913 }
1914 
1915 static bool isInlineValue(unsigned Reg) {
1916   switch (Reg) {
1917   case AMDGPU::SRC_SHARED_BASE:
1918   case AMDGPU::SRC_SHARED_LIMIT:
1919   case AMDGPU::SRC_PRIVATE_BASE:
1920   case AMDGPU::SRC_PRIVATE_LIMIT:
1921   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1922     return true;
1923   case AMDGPU::SRC_VCCZ:
1924   case AMDGPU::SRC_EXECZ:
1925   case AMDGPU::SRC_SCC:
1926     return true;
1927   case AMDGPU::SGPR_NULL:
1928     return true;
1929   default:
1930     return false;
1931   }
1932 }
1933 
1934 bool AMDGPUOperand::isInlineValue() const {
1935   return isRegKind() && ::isInlineValue(getReg());
1936 }
1937 
1938 //===----------------------------------------------------------------------===//
1939 // AsmParser
1940 //===----------------------------------------------------------------------===//
1941 
1942 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1943   if (Is == IS_VGPR) {
1944     switch (RegWidth) {
1945       default: return -1;
1946       case 1: return AMDGPU::VGPR_32RegClassID;
1947       case 2: return AMDGPU::VReg_64RegClassID;
1948       case 3: return AMDGPU::VReg_96RegClassID;
1949       case 4: return AMDGPU::VReg_128RegClassID;
1950       case 5: return AMDGPU::VReg_160RegClassID;
1951       case 6: return AMDGPU::VReg_192RegClassID;
1952       case 8: return AMDGPU::VReg_256RegClassID;
1953       case 16: return AMDGPU::VReg_512RegClassID;
1954       case 32: return AMDGPU::VReg_1024RegClassID;
1955     }
1956   } else if (Is == IS_TTMP) {
1957     switch (RegWidth) {
1958       default: return -1;
1959       case 1: return AMDGPU::TTMP_32RegClassID;
1960       case 2: return AMDGPU::TTMP_64RegClassID;
1961       case 4: return AMDGPU::TTMP_128RegClassID;
1962       case 8: return AMDGPU::TTMP_256RegClassID;
1963       case 16: return AMDGPU::TTMP_512RegClassID;
1964     }
1965   } else if (Is == IS_SGPR) {
1966     switch (RegWidth) {
1967       default: return -1;
1968       case 1: return AMDGPU::SGPR_32RegClassID;
1969       case 2: return AMDGPU::SGPR_64RegClassID;
1970       case 3: return AMDGPU::SGPR_96RegClassID;
1971       case 4: return AMDGPU::SGPR_128RegClassID;
1972       case 5: return AMDGPU::SGPR_160RegClassID;
1973       case 6: return AMDGPU::SGPR_192RegClassID;
1974       case 8: return AMDGPU::SGPR_256RegClassID;
1975       case 16: return AMDGPU::SGPR_512RegClassID;
1976     }
1977   } else if (Is == IS_AGPR) {
1978     switch (RegWidth) {
1979       default: return -1;
1980       case 1: return AMDGPU::AGPR_32RegClassID;
1981       case 2: return AMDGPU::AReg_64RegClassID;
1982       case 3: return AMDGPU::AReg_96RegClassID;
1983       case 4: return AMDGPU::AReg_128RegClassID;
1984       case 5: return AMDGPU::AReg_160RegClassID;
1985       case 6: return AMDGPU::AReg_192RegClassID;
1986       case 8: return AMDGPU::AReg_256RegClassID;
1987       case 16: return AMDGPU::AReg_512RegClassID;
1988       case 32: return AMDGPU::AReg_1024RegClassID;
1989     }
1990   }
1991   return -1;
1992 }
1993 
1994 static unsigned getSpecialRegForName(StringRef RegName) {
1995   return StringSwitch<unsigned>(RegName)
1996     .Case("exec", AMDGPU::EXEC)
1997     .Case("vcc", AMDGPU::VCC)
1998     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1999     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2000     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2001     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2002     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2003     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2004     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2005     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2006     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2007     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2008     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2009     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2010     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2011     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2012     .Case("m0", AMDGPU::M0)
2013     .Case("vccz", AMDGPU::SRC_VCCZ)
2014     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2015     .Case("execz", AMDGPU::SRC_EXECZ)
2016     .Case("src_execz", AMDGPU::SRC_EXECZ)
2017     .Case("scc", AMDGPU::SRC_SCC)
2018     .Case("src_scc", AMDGPU::SRC_SCC)
2019     .Case("tba", AMDGPU::TBA)
2020     .Case("tma", AMDGPU::TMA)
2021     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2022     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2023     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2024     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2025     .Case("vcc_lo", AMDGPU::VCC_LO)
2026     .Case("vcc_hi", AMDGPU::VCC_HI)
2027     .Case("exec_lo", AMDGPU::EXEC_LO)
2028     .Case("exec_hi", AMDGPU::EXEC_HI)
2029     .Case("tma_lo", AMDGPU::TMA_LO)
2030     .Case("tma_hi", AMDGPU::TMA_HI)
2031     .Case("tba_lo", AMDGPU::TBA_LO)
2032     .Case("tba_hi", AMDGPU::TBA_HI)
2033     .Case("pc", AMDGPU::PC_REG)
2034     .Case("null", AMDGPU::SGPR_NULL)
2035     .Default(AMDGPU::NoRegister);
2036 }
2037 
2038 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2039                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2040   auto R = parseRegister();
2041   if (!R) return true;
2042   assert(R->isReg());
2043   RegNo = R->getReg();
2044   StartLoc = R->getStartLoc();
2045   EndLoc = R->getEndLoc();
2046   return false;
2047 }
2048 
2049 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2050                                     SMLoc &EndLoc) {
2051   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2052 }
2053 
2054 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2055                                                        SMLoc &StartLoc,
2056                                                        SMLoc &EndLoc) {
2057   bool Result =
2058       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2059   bool PendingErrors = getParser().hasPendingError();
2060   getParser().clearPendingErrors();
2061   if (PendingErrors)
2062     return MatchOperand_ParseFail;
2063   if (Result)
2064     return MatchOperand_NoMatch;
2065   return MatchOperand_Success;
2066 }
2067 
2068 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2069                                             RegisterKind RegKind, unsigned Reg1,
2070                                             SMLoc Loc) {
2071   switch (RegKind) {
2072   case IS_SPECIAL:
2073     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2074       Reg = AMDGPU::EXEC;
2075       RegWidth = 2;
2076       return true;
2077     }
2078     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2079       Reg = AMDGPU::FLAT_SCR;
2080       RegWidth = 2;
2081       return true;
2082     }
2083     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2084       Reg = AMDGPU::XNACK_MASK;
2085       RegWidth = 2;
2086       return true;
2087     }
2088     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2089       Reg = AMDGPU::VCC;
2090       RegWidth = 2;
2091       return true;
2092     }
2093     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2094       Reg = AMDGPU::TBA;
2095       RegWidth = 2;
2096       return true;
2097     }
2098     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2099       Reg = AMDGPU::TMA;
2100       RegWidth = 2;
2101       return true;
2102     }
2103     Error(Loc, "register does not fit in the list");
2104     return false;
2105   case IS_VGPR:
2106   case IS_SGPR:
2107   case IS_AGPR:
2108   case IS_TTMP:
2109     if (Reg1 != Reg + RegWidth) {
2110       Error(Loc, "registers in a list must have consecutive indices");
2111       return false;
2112     }
2113     RegWidth++;
2114     return true;
2115   default:
2116     llvm_unreachable("unexpected register kind");
2117   }
2118 }
2119 
2120 struct RegInfo {
2121   StringLiteral Name;
2122   RegisterKind Kind;
2123 };
2124 
2125 static constexpr RegInfo RegularRegisters[] = {
2126   {{"v"},    IS_VGPR},
2127   {{"s"},    IS_SGPR},
2128   {{"ttmp"}, IS_TTMP},
2129   {{"acc"},  IS_AGPR},
2130   {{"a"},    IS_AGPR},
2131 };
2132 
2133 static bool isRegularReg(RegisterKind Kind) {
2134   return Kind == IS_VGPR ||
2135          Kind == IS_SGPR ||
2136          Kind == IS_TTMP ||
2137          Kind == IS_AGPR;
2138 }
2139 
2140 static const RegInfo* getRegularRegInfo(StringRef Str) {
2141   for (const RegInfo &Reg : RegularRegisters)
2142     if (Str.startswith(Reg.Name))
2143       return &Reg;
2144   return nullptr;
2145 }
2146 
2147 static bool getRegNum(StringRef Str, unsigned& Num) {
2148   return !Str.getAsInteger(10, Num);
2149 }
2150 
2151 bool
2152 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2153                             const AsmToken &NextToken) const {
2154 
2155   // A list of consecutive registers: [s0,s1,s2,s3]
2156   if (Token.is(AsmToken::LBrac))
2157     return true;
2158 
2159   if (!Token.is(AsmToken::Identifier))
2160     return false;
2161 
2162   // A single register like s0 or a range of registers like s[0:1]
2163 
2164   StringRef Str = Token.getString();
2165   const RegInfo *Reg = getRegularRegInfo(Str);
2166   if (Reg) {
2167     StringRef RegName = Reg->Name;
2168     StringRef RegSuffix = Str.substr(RegName.size());
2169     if (!RegSuffix.empty()) {
2170       unsigned Num;
2171       // A single register with an index: rXX
2172       if (getRegNum(RegSuffix, Num))
2173         return true;
2174     } else {
2175       // A range of registers: r[XX:YY].
2176       if (NextToken.is(AsmToken::LBrac))
2177         return true;
2178     }
2179   }
2180 
2181   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2182 }
2183 
2184 bool
2185 AMDGPUAsmParser::isRegister()
2186 {
2187   return isRegister(getToken(), peekToken());
2188 }
2189 
2190 unsigned
2191 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2192                                unsigned RegNum,
2193                                unsigned RegWidth,
2194                                SMLoc Loc) {
2195 
2196   assert(isRegularReg(RegKind));
2197 
2198   unsigned AlignSize = 1;
2199   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2200     // SGPR and TTMP registers must be aligned.
2201     // Max required alignment is 4 dwords.
2202     AlignSize = std::min(RegWidth, 4u);
2203   }
2204 
2205   if (RegNum % AlignSize != 0) {
2206     Error(Loc, "invalid register alignment");
2207     return AMDGPU::NoRegister;
2208   }
2209 
2210   unsigned RegIdx = RegNum / AlignSize;
2211   int RCID = getRegClass(RegKind, RegWidth);
2212   if (RCID == -1) {
2213     Error(Loc, "invalid or unsupported register size");
2214     return AMDGPU::NoRegister;
2215   }
2216 
2217   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2218   const MCRegisterClass RC = TRI->getRegClass(RCID);
2219   if (RegIdx >= RC.getNumRegs()) {
2220     Error(Loc, "register index is out of range");
2221     return AMDGPU::NoRegister;
2222   }
2223 
2224   return RC.getRegister(RegIdx);
2225 }
2226 
2227 bool
2228 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2229   int64_t RegLo, RegHi;
2230   if (!skipToken(AsmToken::LBrac, "missing register index"))
2231     return false;
2232 
2233   SMLoc FirstIdxLoc = getLoc();
2234   SMLoc SecondIdxLoc;
2235 
2236   if (!parseExpr(RegLo))
2237     return false;
2238 
2239   if (trySkipToken(AsmToken::Colon)) {
2240     SecondIdxLoc = getLoc();
2241     if (!parseExpr(RegHi))
2242       return false;
2243   } else {
2244     RegHi = RegLo;
2245   }
2246 
2247   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2248     return false;
2249 
2250   if (!isUInt<32>(RegLo)) {
2251     Error(FirstIdxLoc, "invalid register index");
2252     return false;
2253   }
2254 
2255   if (!isUInt<32>(RegHi)) {
2256     Error(SecondIdxLoc, "invalid register index");
2257     return false;
2258   }
2259 
2260   if (RegLo > RegHi) {
2261     Error(FirstIdxLoc, "first register index should not exceed second index");
2262     return false;
2263   }
2264 
2265   Num = static_cast<unsigned>(RegLo);
2266   Width = (RegHi - RegLo) + 1;
2267   return true;
2268 }
2269 
2270 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2271                                           unsigned &RegNum, unsigned &RegWidth,
2272                                           SmallVectorImpl<AsmToken> &Tokens) {
2273   assert(isToken(AsmToken::Identifier));
2274   unsigned Reg = getSpecialRegForName(getTokenStr());
2275   if (Reg) {
2276     RegNum = 0;
2277     RegWidth = 1;
2278     RegKind = IS_SPECIAL;
2279     Tokens.push_back(getToken());
2280     lex(); // skip register name
2281   }
2282   return Reg;
2283 }
2284 
2285 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2286                                           unsigned &RegNum, unsigned &RegWidth,
2287                                           SmallVectorImpl<AsmToken> &Tokens) {
2288   assert(isToken(AsmToken::Identifier));
2289   StringRef RegName = getTokenStr();
2290   auto Loc = getLoc();
2291 
2292   const RegInfo *RI = getRegularRegInfo(RegName);
2293   if (!RI) {
2294     Error(Loc, "invalid register name");
2295     return AMDGPU::NoRegister;
2296   }
2297 
2298   Tokens.push_back(getToken());
2299   lex(); // skip register name
2300 
2301   RegKind = RI->Kind;
2302   StringRef RegSuffix = RegName.substr(RI->Name.size());
2303   if (!RegSuffix.empty()) {
2304     // Single 32-bit register: vXX.
2305     if (!getRegNum(RegSuffix, RegNum)) {
2306       Error(Loc, "invalid register index");
2307       return AMDGPU::NoRegister;
2308     }
2309     RegWidth = 1;
2310   } else {
2311     // Range of registers: v[XX:YY]. ":YY" is optional.
2312     if (!ParseRegRange(RegNum, RegWidth))
2313       return AMDGPU::NoRegister;
2314   }
2315 
2316   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2317 }
2318 
2319 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2320                                        unsigned &RegWidth,
2321                                        SmallVectorImpl<AsmToken> &Tokens) {
2322   unsigned Reg = AMDGPU::NoRegister;
2323   auto ListLoc = getLoc();
2324 
2325   if (!skipToken(AsmToken::LBrac,
2326                  "expected a register or a list of registers")) {
2327     return AMDGPU::NoRegister;
2328   }
2329 
2330   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2331 
2332   auto Loc = getLoc();
2333   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2334     return AMDGPU::NoRegister;
2335   if (RegWidth != 1) {
2336     Error(Loc, "expected a single 32-bit register");
2337     return AMDGPU::NoRegister;
2338   }
2339 
2340   for (; trySkipToken(AsmToken::Comma); ) {
2341     RegisterKind NextRegKind;
2342     unsigned NextReg, NextRegNum, NextRegWidth;
2343     Loc = getLoc();
2344 
2345     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2346                              NextRegNum, NextRegWidth,
2347                              Tokens)) {
2348       return AMDGPU::NoRegister;
2349     }
2350     if (NextRegWidth != 1) {
2351       Error(Loc, "expected a single 32-bit register");
2352       return AMDGPU::NoRegister;
2353     }
2354     if (NextRegKind != RegKind) {
2355       Error(Loc, "registers in a list must be of the same kind");
2356       return AMDGPU::NoRegister;
2357     }
2358     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2359       return AMDGPU::NoRegister;
2360   }
2361 
2362   if (!skipToken(AsmToken::RBrac,
2363                  "expected a comma or a closing square bracket")) {
2364     return AMDGPU::NoRegister;
2365   }
2366 
2367   if (isRegularReg(RegKind))
2368     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2369 
2370   return Reg;
2371 }
2372 
2373 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2374                                           unsigned &RegNum, unsigned &RegWidth,
2375                                           SmallVectorImpl<AsmToken> &Tokens) {
2376   auto Loc = getLoc();
2377   Reg = AMDGPU::NoRegister;
2378 
2379   if (isToken(AsmToken::Identifier)) {
2380     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2381     if (Reg == AMDGPU::NoRegister)
2382       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2383   } else {
2384     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2385   }
2386 
2387   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2388   if (Reg == AMDGPU::NoRegister) {
2389     assert(Parser.hasPendingError());
2390     return false;
2391   }
2392 
2393   if (!subtargetHasRegister(*TRI, Reg)) {
2394     if (Reg == AMDGPU::SGPR_NULL) {
2395       Error(Loc, "'null' operand is not supported on this GPU");
2396     } else {
2397       Error(Loc, "register not available on this GPU");
2398     }
2399     return false;
2400   }
2401 
2402   return true;
2403 }
2404 
2405 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2406                                           unsigned &RegNum, unsigned &RegWidth,
2407                                           bool RestoreOnFailure /*=false*/) {
2408   Reg = AMDGPU::NoRegister;
2409 
2410   SmallVector<AsmToken, 1> Tokens;
2411   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2412     if (RestoreOnFailure) {
2413       while (!Tokens.empty()) {
2414         getLexer().UnLex(Tokens.pop_back_val());
2415       }
2416     }
2417     return true;
2418   }
2419   return false;
2420 }
2421 
2422 Optional<StringRef>
2423 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2424   switch (RegKind) {
2425   case IS_VGPR:
2426     return StringRef(".amdgcn.next_free_vgpr");
2427   case IS_SGPR:
2428     return StringRef(".amdgcn.next_free_sgpr");
2429   default:
2430     return None;
2431   }
2432 }
2433 
2434 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2435   auto SymbolName = getGprCountSymbolName(RegKind);
2436   assert(SymbolName && "initializing invalid register kind");
2437   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2438   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2439 }
2440 
2441 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2442                                             unsigned DwordRegIndex,
2443                                             unsigned RegWidth) {
2444   // Symbols are only defined for GCN targets
2445   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2446     return true;
2447 
2448   auto SymbolName = getGprCountSymbolName(RegKind);
2449   if (!SymbolName)
2450     return true;
2451   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2452 
2453   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2454   int64_t OldCount;
2455 
2456   if (!Sym->isVariable())
2457     return !Error(getParser().getTok().getLoc(),
2458                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2459   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2460     return !Error(
2461         getParser().getTok().getLoc(),
2462         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2463 
2464   if (OldCount <= NewMax)
2465     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2466 
2467   return true;
2468 }
2469 
2470 std::unique_ptr<AMDGPUOperand>
2471 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2472   const auto &Tok = Parser.getTok();
2473   SMLoc StartLoc = Tok.getLoc();
2474   SMLoc EndLoc = Tok.getEndLoc();
2475   RegisterKind RegKind;
2476   unsigned Reg, RegNum, RegWidth;
2477 
2478   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2479     return nullptr;
2480   }
2481   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2482     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2483       return nullptr;
2484   } else
2485     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2486   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2487 }
2488 
2489 OperandMatchResultTy
2490 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2491   // TODO: add syntactic sugar for 1/(2*PI)
2492 
2493   assert(!isRegister());
2494   assert(!isModifier());
2495 
2496   const auto& Tok = getToken();
2497   const auto& NextTok = peekToken();
2498   bool IsReal = Tok.is(AsmToken::Real);
2499   SMLoc S = getLoc();
2500   bool Negate = false;
2501 
2502   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2503     lex();
2504     IsReal = true;
2505     Negate = true;
2506   }
2507 
2508   if (IsReal) {
2509     // Floating-point expressions are not supported.
2510     // Can only allow floating-point literals with an
2511     // optional sign.
2512 
2513     StringRef Num = getTokenStr();
2514     lex();
2515 
2516     APFloat RealVal(APFloat::IEEEdouble());
2517     auto roundMode = APFloat::rmNearestTiesToEven;
2518     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2519       return MatchOperand_ParseFail;
2520     }
2521     if (Negate)
2522       RealVal.changeSign();
2523 
2524     Operands.push_back(
2525       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2526                                AMDGPUOperand::ImmTyNone, true));
2527 
2528     return MatchOperand_Success;
2529 
2530   } else {
2531     int64_t IntVal;
2532     const MCExpr *Expr;
2533     SMLoc S = getLoc();
2534 
2535     if (HasSP3AbsModifier) {
2536       // This is a workaround for handling expressions
2537       // as arguments of SP3 'abs' modifier, for example:
2538       //     |1.0|
2539       //     |-1|
2540       //     |1+x|
2541       // This syntax is not compatible with syntax of standard
2542       // MC expressions (due to the trailing '|').
2543       SMLoc EndLoc;
2544       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2545         return MatchOperand_ParseFail;
2546     } else {
2547       if (Parser.parseExpression(Expr))
2548         return MatchOperand_ParseFail;
2549     }
2550 
2551     if (Expr->evaluateAsAbsolute(IntVal)) {
2552       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2553     } else {
2554       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2555     }
2556 
2557     return MatchOperand_Success;
2558   }
2559 
2560   return MatchOperand_NoMatch;
2561 }
2562 
2563 OperandMatchResultTy
2564 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2565   if (!isRegister())
2566     return MatchOperand_NoMatch;
2567 
2568   if (auto R = parseRegister()) {
2569     assert(R->isReg());
2570     Operands.push_back(std::move(R));
2571     return MatchOperand_Success;
2572   }
2573   return MatchOperand_ParseFail;
2574 }
2575 
2576 OperandMatchResultTy
2577 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2578   auto res = parseReg(Operands);
2579   if (res != MatchOperand_NoMatch) {
2580     return res;
2581   } else if (isModifier()) {
2582     return MatchOperand_NoMatch;
2583   } else {
2584     return parseImm(Operands, HasSP3AbsMod);
2585   }
2586 }
2587 
2588 bool
2589 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2590   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2591     const auto &str = Token.getString();
2592     return str == "abs" || str == "neg" || str == "sext";
2593   }
2594   return false;
2595 }
2596 
2597 bool
2598 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2599   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2600 }
2601 
2602 bool
2603 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2604   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2605 }
2606 
2607 bool
2608 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2609   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2610 }
2611 
2612 // Check if this is an operand modifier or an opcode modifier
2613 // which may look like an expression but it is not. We should
2614 // avoid parsing these modifiers as expressions. Currently
2615 // recognized sequences are:
2616 //   |...|
2617 //   abs(...)
2618 //   neg(...)
2619 //   sext(...)
2620 //   -reg
2621 //   -|...|
2622 //   -abs(...)
2623 //   name:...
2624 // Note that simple opcode modifiers like 'gds' may be parsed as
2625 // expressions; this is a special case. See getExpressionAsToken.
2626 //
2627 bool
2628 AMDGPUAsmParser::isModifier() {
2629 
2630   AsmToken Tok = getToken();
2631   AsmToken NextToken[2];
2632   peekTokens(NextToken);
2633 
2634   return isOperandModifier(Tok, NextToken[0]) ||
2635          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2636          isOpcodeModifierWithVal(Tok, NextToken[0]);
2637 }
2638 
2639 // Check if the current token is an SP3 'neg' modifier.
2640 // Currently this modifier is allowed in the following context:
2641 //
2642 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2643 // 2. Before an 'abs' modifier: -abs(...)
2644 // 3. Before an SP3 'abs' modifier: -|...|
2645 //
2646 // In all other cases "-" is handled as a part
2647 // of an expression that follows the sign.
2648 //
2649 // Note: When "-" is followed by an integer literal,
2650 // this is interpreted as integer negation rather
2651 // than a floating-point NEG modifier applied to N.
2652 // Beside being contr-intuitive, such use of floating-point
2653 // NEG modifier would have resulted in different meaning
2654 // of integer literals used with VOP1/2/C and VOP3,
2655 // for example:
2656 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2657 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2658 // Negative fp literals with preceding "-" are
2659 // handled likewise for unifomtity
2660 //
2661 bool
2662 AMDGPUAsmParser::parseSP3NegModifier() {
2663 
2664   AsmToken NextToken[2];
2665   peekTokens(NextToken);
2666 
2667   if (isToken(AsmToken::Minus) &&
2668       (isRegister(NextToken[0], NextToken[1]) ||
2669        NextToken[0].is(AsmToken::Pipe) ||
2670        isId(NextToken[0], "abs"))) {
2671     lex();
2672     return true;
2673   }
2674 
2675   return false;
2676 }
2677 
2678 OperandMatchResultTy
2679 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2680                                               bool AllowImm) {
2681   bool Neg, SP3Neg;
2682   bool Abs, SP3Abs;
2683   SMLoc Loc;
2684 
2685   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2686   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2687     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2688     return MatchOperand_ParseFail;
2689   }
2690 
2691   SP3Neg = parseSP3NegModifier();
2692 
2693   Loc = getLoc();
2694   Neg = trySkipId("neg");
2695   if (Neg && SP3Neg) {
2696     Error(Loc, "expected register or immediate");
2697     return MatchOperand_ParseFail;
2698   }
2699   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2700     return MatchOperand_ParseFail;
2701 
2702   Abs = trySkipId("abs");
2703   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2704     return MatchOperand_ParseFail;
2705 
2706   Loc = getLoc();
2707   SP3Abs = trySkipToken(AsmToken::Pipe);
2708   if (Abs && SP3Abs) {
2709     Error(Loc, "expected register or immediate");
2710     return MatchOperand_ParseFail;
2711   }
2712 
2713   OperandMatchResultTy Res;
2714   if (AllowImm) {
2715     Res = parseRegOrImm(Operands, SP3Abs);
2716   } else {
2717     Res = parseReg(Operands);
2718   }
2719   if (Res != MatchOperand_Success) {
2720     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2721   }
2722 
2723   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2724     return MatchOperand_ParseFail;
2725   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2726     return MatchOperand_ParseFail;
2727   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2728     return MatchOperand_ParseFail;
2729 
2730   AMDGPUOperand::Modifiers Mods;
2731   Mods.Abs = Abs || SP3Abs;
2732   Mods.Neg = Neg || SP3Neg;
2733 
2734   if (Mods.hasFPModifiers()) {
2735     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2736     if (Op.isExpr()) {
2737       Error(Op.getStartLoc(), "expected an absolute expression");
2738       return MatchOperand_ParseFail;
2739     }
2740     Op.setModifiers(Mods);
2741   }
2742   return MatchOperand_Success;
2743 }
2744 
2745 OperandMatchResultTy
2746 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2747                                                bool AllowImm) {
2748   bool Sext = trySkipId("sext");
2749   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2750     return MatchOperand_ParseFail;
2751 
2752   OperandMatchResultTy Res;
2753   if (AllowImm) {
2754     Res = parseRegOrImm(Operands);
2755   } else {
2756     Res = parseReg(Operands);
2757   }
2758   if (Res != MatchOperand_Success) {
2759     return Sext? MatchOperand_ParseFail : Res;
2760   }
2761 
2762   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2763     return MatchOperand_ParseFail;
2764 
2765   AMDGPUOperand::Modifiers Mods;
2766   Mods.Sext = Sext;
2767 
2768   if (Mods.hasIntModifiers()) {
2769     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2770     if (Op.isExpr()) {
2771       Error(Op.getStartLoc(), "expected an absolute expression");
2772       return MatchOperand_ParseFail;
2773     }
2774     Op.setModifiers(Mods);
2775   }
2776 
2777   return MatchOperand_Success;
2778 }
2779 
2780 OperandMatchResultTy
2781 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2782   return parseRegOrImmWithFPInputMods(Operands, false);
2783 }
2784 
2785 OperandMatchResultTy
2786 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2787   return parseRegOrImmWithIntInputMods(Operands, false);
2788 }
2789 
2790 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2791   auto Loc = getLoc();
2792   if (trySkipId("off")) {
2793     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2794                                                 AMDGPUOperand::ImmTyOff, false));
2795     return MatchOperand_Success;
2796   }
2797 
2798   if (!isRegister())
2799     return MatchOperand_NoMatch;
2800 
2801   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2802   if (Reg) {
2803     Operands.push_back(std::move(Reg));
2804     return MatchOperand_Success;
2805   }
2806 
2807   return MatchOperand_ParseFail;
2808 
2809 }
2810 
2811 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2812   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2813 
2814   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2815       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2816       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2817       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2818     return Match_InvalidOperand;
2819 
2820   if ((TSFlags & SIInstrFlags::VOP3) &&
2821       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2822       getForcedEncodingSize() != 64)
2823     return Match_PreferE32;
2824 
2825   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2826       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2827     // v_mac_f32/16 allow only dst_sel == DWORD;
2828     auto OpNum =
2829         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2830     const auto &Op = Inst.getOperand(OpNum);
2831     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2832       return Match_InvalidOperand;
2833     }
2834   }
2835 
2836   return Match_Success;
2837 }
2838 
2839 // What asm variants we should check
2840 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2841   if (getForcedEncodingSize() == 32) {
2842     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2843     return makeArrayRef(Variants);
2844   }
2845 
2846   if (isForcedVOP3()) {
2847     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2848     return makeArrayRef(Variants);
2849   }
2850 
2851   if (isForcedSDWA()) {
2852     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2853                                         AMDGPUAsmVariants::SDWA9};
2854     return makeArrayRef(Variants);
2855   }
2856 
2857   if (isForcedDPP()) {
2858     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2859     return makeArrayRef(Variants);
2860   }
2861 
2862   static const unsigned Variants[] = {
2863     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2864     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2865   };
2866 
2867   return makeArrayRef(Variants);
2868 }
2869 
2870 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2871   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2872   const unsigned Num = Desc.getNumImplicitUses();
2873   for (unsigned i = 0; i < Num; ++i) {
2874     unsigned Reg = Desc.ImplicitUses[i];
2875     switch (Reg) {
2876     case AMDGPU::FLAT_SCR:
2877     case AMDGPU::VCC:
2878     case AMDGPU::VCC_LO:
2879     case AMDGPU::VCC_HI:
2880     case AMDGPU::M0:
2881       return Reg;
2882     default:
2883       break;
2884     }
2885   }
2886   return AMDGPU::NoRegister;
2887 }
2888 
2889 // NB: This code is correct only when used to check constant
2890 // bus limitations because GFX7 support no f16 inline constants.
2891 // Note that there are no cases when a GFX7 opcode violates
2892 // constant bus limitations due to the use of an f16 constant.
2893 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2894                                        unsigned OpIdx) const {
2895   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2896 
2897   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2898     return false;
2899   }
2900 
2901   const MCOperand &MO = Inst.getOperand(OpIdx);
2902 
2903   int64_t Val = MO.getImm();
2904   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2905 
2906   switch (OpSize) { // expected operand size
2907   case 8:
2908     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2909   case 4:
2910     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2911   case 2: {
2912     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2913     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2914         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2915         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2916       return AMDGPU::isInlinableIntLiteral(Val);
2917 
2918     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2919         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2920         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2921       return AMDGPU::isInlinableIntLiteralV216(Val);
2922 
2923     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2924         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2925         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2926       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2927 
2928     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2929   }
2930   default:
2931     llvm_unreachable("invalid operand size");
2932   }
2933 }
2934 
2935 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2936   if (!isGFX10())
2937     return 1;
2938 
2939   switch (Opcode) {
2940   // 64-bit shift instructions can use only one scalar value input
2941   case AMDGPU::V_LSHLREV_B64:
2942   case AMDGPU::V_LSHLREV_B64_gfx10:
2943   case AMDGPU::V_LSHL_B64:
2944   case AMDGPU::V_LSHRREV_B64:
2945   case AMDGPU::V_LSHRREV_B64_gfx10:
2946   case AMDGPU::V_LSHR_B64:
2947   case AMDGPU::V_ASHRREV_I64:
2948   case AMDGPU::V_ASHRREV_I64_gfx10:
2949   case AMDGPU::V_ASHR_I64:
2950     return 1;
2951   default:
2952     return 2;
2953   }
2954 }
2955 
2956 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2957   const MCOperand &MO = Inst.getOperand(OpIdx);
2958   if (MO.isImm()) {
2959     return !isInlineConstant(Inst, OpIdx);
2960   } else if (MO.isReg()) {
2961     auto Reg = MO.getReg();
2962     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2963     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2964   } else {
2965     return true;
2966   }
2967 }
2968 
2969 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2970   const unsigned Opcode = Inst.getOpcode();
2971   const MCInstrDesc &Desc = MII.get(Opcode);
2972   unsigned ConstantBusUseCount = 0;
2973   unsigned NumLiterals = 0;
2974   unsigned LiteralSize;
2975 
2976   if (Desc.TSFlags &
2977       (SIInstrFlags::VOPC |
2978        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2979        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2980        SIInstrFlags::SDWA)) {
2981     // Check special imm operands (used by madmk, etc)
2982     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2983       ++ConstantBusUseCount;
2984     }
2985 
2986     SmallDenseSet<unsigned> SGPRsUsed;
2987     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2988     if (SGPRUsed != AMDGPU::NoRegister) {
2989       SGPRsUsed.insert(SGPRUsed);
2990       ++ConstantBusUseCount;
2991     }
2992 
2993     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2994     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2995     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2996 
2997     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2998 
2999     for (int OpIdx : OpIndices) {
3000       if (OpIdx == -1) break;
3001 
3002       const MCOperand &MO = Inst.getOperand(OpIdx);
3003       if (usesConstantBus(Inst, OpIdx)) {
3004         if (MO.isReg()) {
3005           const unsigned Reg = mc2PseudoReg(MO.getReg());
3006           // Pairs of registers with a partial intersections like these
3007           //   s0, s[0:1]
3008           //   flat_scratch_lo, flat_scratch
3009           //   flat_scratch_lo, flat_scratch_hi
3010           // are theoretically valid but they are disabled anyway.
3011           // Note that this code mimics SIInstrInfo::verifyInstruction
3012           if (!SGPRsUsed.count(Reg)) {
3013             SGPRsUsed.insert(Reg);
3014             ++ConstantBusUseCount;
3015           }
3016         } else { // Expression or a literal
3017 
3018           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3019             continue; // special operand like VINTERP attr_chan
3020 
3021           // An instruction may use only one literal.
3022           // This has been validated on the previous step.
3023           // See validateVOP3Literal.
3024           // This literal may be used as more than one operand.
3025           // If all these operands are of the same size,
3026           // this literal counts as one scalar value.
3027           // Otherwise it counts as 2 scalar values.
3028           // See "GFX10 Shader Programming", section 3.6.2.3.
3029 
3030           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3031           if (Size < 4) Size = 4;
3032 
3033           if (NumLiterals == 0) {
3034             NumLiterals = 1;
3035             LiteralSize = Size;
3036           } else if (LiteralSize != Size) {
3037             NumLiterals = 2;
3038           }
3039         }
3040       }
3041     }
3042   }
3043   ConstantBusUseCount += NumLiterals;
3044 
3045   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3046 }
3047 
3048 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3049   const unsigned Opcode = Inst.getOpcode();
3050   const MCInstrDesc &Desc = MII.get(Opcode);
3051 
3052   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3053   if (DstIdx == -1 ||
3054       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3055     return true;
3056   }
3057 
3058   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3059 
3060   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3061   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3062   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3063 
3064   assert(DstIdx != -1);
3065   const MCOperand &Dst = Inst.getOperand(DstIdx);
3066   assert(Dst.isReg());
3067   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3068 
3069   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3070 
3071   for (int SrcIdx : SrcIndices) {
3072     if (SrcIdx == -1) break;
3073     const MCOperand &Src = Inst.getOperand(SrcIdx);
3074     if (Src.isReg()) {
3075       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3076       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3077         return false;
3078       }
3079     }
3080   }
3081 
3082   return true;
3083 }
3084 
3085 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3086 
3087   const unsigned Opc = Inst.getOpcode();
3088   const MCInstrDesc &Desc = MII.get(Opc);
3089 
3090   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3091     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3092     assert(ClampIdx != -1);
3093     return Inst.getOperand(ClampIdx).getImm() == 0;
3094   }
3095 
3096   return true;
3097 }
3098 
3099 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3100 
3101   const unsigned Opc = Inst.getOpcode();
3102   const MCInstrDesc &Desc = MII.get(Opc);
3103 
3104   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3105     return true;
3106 
3107   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3108   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3109   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3110 
3111   assert(VDataIdx != -1);
3112   assert(DMaskIdx != -1);
3113   assert(TFEIdx != -1);
3114 
3115   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3116   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3117   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3118   if (DMask == 0)
3119     DMask = 1;
3120 
3121   unsigned DataSize =
3122     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3123   if (hasPackedD16()) {
3124     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3125     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3126       DataSize = (DataSize + 1) / 2;
3127   }
3128 
3129   return (VDataSize / 4) == DataSize + TFESize;
3130 }
3131 
3132 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3133   const unsigned Opc = Inst.getOpcode();
3134   const MCInstrDesc &Desc = MII.get(Opc);
3135 
3136   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3137     return true;
3138 
3139   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3140   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3141       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3142   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3143   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3144   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3145 
3146   assert(VAddr0Idx != -1);
3147   assert(SrsrcIdx != -1);
3148   assert(DimIdx != -1);
3149   assert(SrsrcIdx > VAddr0Idx);
3150 
3151   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3152   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3153   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3154   unsigned VAddrSize =
3155       IsNSA ? SrsrcIdx - VAddr0Idx
3156             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3157 
3158   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3159                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3160                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3161                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3162   if (!IsNSA) {
3163     if (AddrSize > 8)
3164       AddrSize = 16;
3165     else if (AddrSize > 4)
3166       AddrSize = 8;
3167   }
3168 
3169   return VAddrSize == AddrSize;
3170 }
3171 
3172 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3173 
3174   const unsigned Opc = Inst.getOpcode();
3175   const MCInstrDesc &Desc = MII.get(Opc);
3176 
3177   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3178     return true;
3179   if (!Desc.mayLoad() || !Desc.mayStore())
3180     return true; // Not atomic
3181 
3182   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3183   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3184 
3185   // This is an incomplete check because image_atomic_cmpswap
3186   // may only use 0x3 and 0xf while other atomic operations
3187   // may use 0x1 and 0x3. However these limitations are
3188   // verified when we check that dmask matches dst size.
3189   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3190 }
3191 
3192 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3193 
3194   const unsigned Opc = Inst.getOpcode();
3195   const MCInstrDesc &Desc = MII.get(Opc);
3196 
3197   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3198     return true;
3199 
3200   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3201   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3202 
3203   // GATHER4 instructions use dmask in a different fashion compared to
3204   // other MIMG instructions. The only useful DMASK values are
3205   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3206   // (red,red,red,red) etc.) The ISA document doesn't mention
3207   // this.
3208   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3209 }
3210 
3211 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3212 {
3213   switch (Opcode) {
3214   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3215   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3216   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3217     return true;
3218   default:
3219     return false;
3220   }
3221 }
3222 
3223 // movrels* opcodes should only allow VGPRS as src0.
3224 // This is specified in .td description for vop1/vop3,
3225 // but sdwa is handled differently. See isSDWAOperand.
3226 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3227 
3228   const unsigned Opc = Inst.getOpcode();
3229   const MCInstrDesc &Desc = MII.get(Opc);
3230 
3231   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3232     return true;
3233 
3234   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3235   assert(Src0Idx != -1);
3236 
3237   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3238   if (!Src0.isReg())
3239     return false;
3240 
3241   auto Reg = Src0.getReg();
3242   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3243   return !isSGPR(mc2PseudoReg(Reg), TRI);
3244 }
3245 
3246 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3247 
3248   const unsigned Opc = Inst.getOpcode();
3249 
3250   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3251     return true;
3252 
3253   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3254   assert(Src0Idx != -1);
3255 
3256   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3257   if (!Src0.isReg())
3258     return true;
3259 
3260   auto Reg = Src0.getReg();
3261   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3262   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3263     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3264     return false;
3265   }
3266 
3267   return true;
3268 }
3269 
3270 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3271 
3272   const unsigned Opc = Inst.getOpcode();
3273   const MCInstrDesc &Desc = MII.get(Opc);
3274 
3275   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3276     return true;
3277 
3278   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3279   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3280     if (isCI() || isSI())
3281       return false;
3282   }
3283 
3284   return true;
3285 }
3286 
3287 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3288   const unsigned Opc = Inst.getOpcode();
3289   const MCInstrDesc &Desc = MII.get(Opc);
3290 
3291   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3292     return true;
3293 
3294   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3295   if (DimIdx < 0)
3296     return true;
3297 
3298   long Imm = Inst.getOperand(DimIdx).getImm();
3299   if (Imm < 0 || Imm >= 8)
3300     return false;
3301 
3302   return true;
3303 }
3304 
3305 static bool IsRevOpcode(const unsigned Opcode)
3306 {
3307   switch (Opcode) {
3308   case AMDGPU::V_SUBREV_F32_e32:
3309   case AMDGPU::V_SUBREV_F32_e64:
3310   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3311   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3312   case AMDGPU::V_SUBREV_F32_e32_vi:
3313   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3314   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3315   case AMDGPU::V_SUBREV_F32_e64_vi:
3316 
3317   case AMDGPU::V_SUBREV_CO_U32_e32:
3318   case AMDGPU::V_SUBREV_CO_U32_e64:
3319   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3320   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3321 
3322   case AMDGPU::V_SUBBREV_U32_e32:
3323   case AMDGPU::V_SUBBREV_U32_e64:
3324   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3325   case AMDGPU::V_SUBBREV_U32_e32_vi:
3326   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3327   case AMDGPU::V_SUBBREV_U32_e64_vi:
3328 
3329   case AMDGPU::V_SUBREV_U32_e32:
3330   case AMDGPU::V_SUBREV_U32_e64:
3331   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3332   case AMDGPU::V_SUBREV_U32_e32_vi:
3333   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3334   case AMDGPU::V_SUBREV_U32_e64_vi:
3335 
3336   case AMDGPU::V_SUBREV_F16_e32:
3337   case AMDGPU::V_SUBREV_F16_e64:
3338   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3339   case AMDGPU::V_SUBREV_F16_e32_vi:
3340   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3341   case AMDGPU::V_SUBREV_F16_e64_vi:
3342 
3343   case AMDGPU::V_SUBREV_U16_e32:
3344   case AMDGPU::V_SUBREV_U16_e64:
3345   case AMDGPU::V_SUBREV_U16_e32_vi:
3346   case AMDGPU::V_SUBREV_U16_e64_vi:
3347 
3348   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3349   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3350   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3351 
3352   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3353   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3354 
3355   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3356   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3357 
3358   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3359   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3360 
3361   case AMDGPU::V_LSHRREV_B32_e32:
3362   case AMDGPU::V_LSHRREV_B32_e64:
3363   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3364   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3365   case AMDGPU::V_LSHRREV_B32_e32_vi:
3366   case AMDGPU::V_LSHRREV_B32_e64_vi:
3367   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3368   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3369 
3370   case AMDGPU::V_ASHRREV_I32_e32:
3371   case AMDGPU::V_ASHRREV_I32_e64:
3372   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3373   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3374   case AMDGPU::V_ASHRREV_I32_e32_vi:
3375   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3376   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3377   case AMDGPU::V_ASHRREV_I32_e64_vi:
3378 
3379   case AMDGPU::V_LSHLREV_B32_e32:
3380   case AMDGPU::V_LSHLREV_B32_e64:
3381   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3382   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3383   case AMDGPU::V_LSHLREV_B32_e32_vi:
3384   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3385   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3386   case AMDGPU::V_LSHLREV_B32_e64_vi:
3387 
3388   case AMDGPU::V_LSHLREV_B16_e32:
3389   case AMDGPU::V_LSHLREV_B16_e64:
3390   case AMDGPU::V_LSHLREV_B16_e32_vi:
3391   case AMDGPU::V_LSHLREV_B16_e64_vi:
3392   case AMDGPU::V_LSHLREV_B16_gfx10:
3393 
3394   case AMDGPU::V_LSHRREV_B16_e32:
3395   case AMDGPU::V_LSHRREV_B16_e64:
3396   case AMDGPU::V_LSHRREV_B16_e32_vi:
3397   case AMDGPU::V_LSHRREV_B16_e64_vi:
3398   case AMDGPU::V_LSHRREV_B16_gfx10:
3399 
3400   case AMDGPU::V_ASHRREV_I16_e32:
3401   case AMDGPU::V_ASHRREV_I16_e64:
3402   case AMDGPU::V_ASHRREV_I16_e32_vi:
3403   case AMDGPU::V_ASHRREV_I16_e64_vi:
3404   case AMDGPU::V_ASHRREV_I16_gfx10:
3405 
3406   case AMDGPU::V_LSHLREV_B64:
3407   case AMDGPU::V_LSHLREV_B64_gfx10:
3408   case AMDGPU::V_LSHLREV_B64_vi:
3409 
3410   case AMDGPU::V_LSHRREV_B64:
3411   case AMDGPU::V_LSHRREV_B64_gfx10:
3412   case AMDGPU::V_LSHRREV_B64_vi:
3413 
3414   case AMDGPU::V_ASHRREV_I64:
3415   case AMDGPU::V_ASHRREV_I64_gfx10:
3416   case AMDGPU::V_ASHRREV_I64_vi:
3417 
3418   case AMDGPU::V_PK_LSHLREV_B16:
3419   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3420   case AMDGPU::V_PK_LSHLREV_B16_vi:
3421 
3422   case AMDGPU::V_PK_LSHRREV_B16:
3423   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3424   case AMDGPU::V_PK_LSHRREV_B16_vi:
3425   case AMDGPU::V_PK_ASHRREV_I16:
3426   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3427   case AMDGPU::V_PK_ASHRREV_I16_vi:
3428     return true;
3429   default:
3430     return false;
3431   }
3432 }
3433 
3434 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3435 
3436   using namespace SIInstrFlags;
3437   const unsigned Opcode = Inst.getOpcode();
3438   const MCInstrDesc &Desc = MII.get(Opcode);
3439 
3440   // lds_direct register is defined so that it can be used
3441   // with 9-bit operands only. Ignore encodings which do not accept these.
3442   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3443     return true;
3444 
3445   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3446   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3447   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3448 
3449   const int SrcIndices[] = { Src1Idx, Src2Idx };
3450 
3451   // lds_direct cannot be specified as either src1 or src2.
3452   for (int SrcIdx : SrcIndices) {
3453     if (SrcIdx == -1) break;
3454     const MCOperand &Src = Inst.getOperand(SrcIdx);
3455     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3456       return false;
3457     }
3458   }
3459 
3460   if (Src0Idx == -1)
3461     return true;
3462 
3463   const MCOperand &Src = Inst.getOperand(Src0Idx);
3464   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3465     return true;
3466 
3467   // lds_direct is specified as src0. Check additional limitations.
3468   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3469 }
3470 
3471 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3472   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3473     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3474     if (Op.isFlatOffset())
3475       return Op.getStartLoc();
3476   }
3477   return getLoc();
3478 }
3479 
3480 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3481                                          const OperandVector &Operands) {
3482   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3483   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3484     return true;
3485 
3486   auto Opcode = Inst.getOpcode();
3487   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3488   assert(OpNum != -1);
3489 
3490   const auto &Op = Inst.getOperand(OpNum);
3491   if (!hasFlatOffsets() && Op.getImm() != 0) {
3492     Error(getFlatOffsetLoc(Operands),
3493           "flat offset modifier is not supported on this GPU");
3494     return false;
3495   }
3496 
3497   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3498   // For FLAT segment the offset must be positive;
3499   // MSB is ignored and forced to zero.
3500   unsigned OffsetSize = isGFX9() ? 13 : 12;
3501   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3502     if (!isIntN(OffsetSize, Op.getImm())) {
3503       Error(getFlatOffsetLoc(Operands),
3504             isGFX9() ? "expected a 13-bit signed offset" :
3505                        "expected a 12-bit signed offset");
3506       return false;
3507     }
3508   } else {
3509     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3510       Error(getFlatOffsetLoc(Operands),
3511             isGFX9() ? "expected a 12-bit unsigned offset" :
3512                        "expected an 11-bit unsigned offset");
3513       return false;
3514     }
3515   }
3516 
3517   return true;
3518 }
3519 
3520 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3521   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3522     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3523     if (Op.isSMEMOffset())
3524       return Op.getStartLoc();
3525   }
3526   return getLoc();
3527 }
3528 
3529 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3530                                          const OperandVector &Operands) {
3531   if (isCI() || isSI())
3532     return true;
3533 
3534   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3535   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3536     return true;
3537 
3538   auto Opcode = Inst.getOpcode();
3539   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3540   if (OpNum == -1)
3541     return true;
3542 
3543   const auto &Op = Inst.getOperand(OpNum);
3544   if (!Op.isImm())
3545     return true;
3546 
3547   uint64_t Offset = Op.getImm();
3548   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3549   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3550       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3551     return true;
3552 
3553   Error(getSMEMOffsetLoc(Operands),
3554         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3555                                "expected a 21-bit signed offset");
3556 
3557   return false;
3558 }
3559 
3560 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3561   unsigned Opcode = Inst.getOpcode();
3562   const MCInstrDesc &Desc = MII.get(Opcode);
3563   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3564     return true;
3565 
3566   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3567   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3568 
3569   const int OpIndices[] = { Src0Idx, Src1Idx };
3570 
3571   unsigned NumExprs = 0;
3572   unsigned NumLiterals = 0;
3573   uint32_t LiteralValue;
3574 
3575   for (int OpIdx : OpIndices) {
3576     if (OpIdx == -1) break;
3577 
3578     const MCOperand &MO = Inst.getOperand(OpIdx);
3579     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3580     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3581       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3582         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3583         if (NumLiterals == 0 || LiteralValue != Value) {
3584           LiteralValue = Value;
3585           ++NumLiterals;
3586         }
3587       } else if (MO.isExpr()) {
3588         ++NumExprs;
3589       }
3590     }
3591   }
3592 
3593   return NumLiterals + NumExprs <= 1;
3594 }
3595 
3596 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3597   const unsigned Opc = Inst.getOpcode();
3598   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3599       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3600     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3601     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3602 
3603     if (OpSel & ~3)
3604       return false;
3605   }
3606   return true;
3607 }
3608 
3609 // Check if VCC register matches wavefront size
3610 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3611   auto FB = getFeatureBits();
3612   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3613     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3614 }
3615 
3616 // VOP3 literal is only allowed in GFX10+ and only one can be used
3617 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3618   unsigned Opcode = Inst.getOpcode();
3619   const MCInstrDesc &Desc = MII.get(Opcode);
3620   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3621     return true;
3622 
3623   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3624   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3625   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3626 
3627   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3628 
3629   unsigned NumExprs = 0;
3630   unsigned NumLiterals = 0;
3631   uint32_t LiteralValue;
3632 
3633   for (int OpIdx : OpIndices) {
3634     if (OpIdx == -1) break;
3635 
3636     const MCOperand &MO = Inst.getOperand(OpIdx);
3637     if (!MO.isImm() && !MO.isExpr())
3638       continue;
3639     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3640       continue;
3641 
3642     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3643         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3644       return false;
3645 
3646     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3647       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3648       if (NumLiterals == 0 || LiteralValue != Value) {
3649         LiteralValue = Value;
3650         ++NumLiterals;
3651       }
3652     } else if (MO.isExpr()) {
3653       ++NumExprs;
3654     }
3655   }
3656   NumLiterals += NumExprs;
3657 
3658   return !NumLiterals ||
3659          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3660 }
3661 
3662 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3663                                           const SMLoc &IDLoc,
3664                                           const OperandVector &Operands) {
3665   if (!validateLdsDirect(Inst)) {
3666     Error(IDLoc,
3667       "invalid use of lds_direct");
3668     return false;
3669   }
3670   if (!validateSOPLiteral(Inst)) {
3671     Error(IDLoc,
3672       "only one literal operand is allowed");
3673     return false;
3674   }
3675   if (!validateVOP3Literal(Inst)) {
3676     Error(IDLoc,
3677       "invalid literal operand");
3678     return false;
3679   }
3680   if (!validateConstantBusLimitations(Inst)) {
3681     Error(IDLoc,
3682       "invalid operand (violates constant bus restrictions)");
3683     return false;
3684   }
3685   if (!validateEarlyClobberLimitations(Inst)) {
3686     Error(IDLoc,
3687       "destination must be different than all sources");
3688     return false;
3689   }
3690   if (!validateIntClampSupported(Inst)) {
3691     Error(IDLoc,
3692       "integer clamping is not supported on this GPU");
3693     return false;
3694   }
3695   if (!validateOpSel(Inst)) {
3696     Error(IDLoc,
3697       "invalid op_sel operand");
3698     return false;
3699   }
3700   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3701   if (!validateMIMGD16(Inst)) {
3702     Error(IDLoc,
3703       "d16 modifier is not supported on this GPU");
3704     return false;
3705   }
3706   if (!validateMIMGDim(Inst)) {
3707     Error(IDLoc, "dim modifier is required on this GPU");
3708     return false;
3709   }
3710   if (!validateMIMGDataSize(Inst)) {
3711     Error(IDLoc,
3712       "image data size does not match dmask and tfe");
3713     return false;
3714   }
3715   if (!validateMIMGAddrSize(Inst)) {
3716     Error(IDLoc,
3717       "image address size does not match dim and a16");
3718     return false;
3719   }
3720   if (!validateMIMGAtomicDMask(Inst)) {
3721     Error(IDLoc,
3722       "invalid atomic image dmask");
3723     return false;
3724   }
3725   if (!validateMIMGGatherDMask(Inst)) {
3726     Error(IDLoc,
3727       "invalid image_gather dmask: only one bit must be set");
3728     return false;
3729   }
3730   if (!validateMovrels(Inst)) {
3731     Error(IDLoc, "source operand must be a VGPR");
3732     return false;
3733   }
3734   if (!validateFlatOffset(Inst, Operands)) {
3735     return false;
3736   }
3737   if (!validateSMEMOffset(Inst, Operands)) {
3738     return false;
3739   }
3740   if (!validateMAIAccWrite(Inst)) {
3741     return false;
3742   }
3743 
3744   return true;
3745 }
3746 
3747 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3748                                             const FeatureBitset &FBS,
3749                                             unsigned VariantID = 0);
3750 
3751 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3752                                               OperandVector &Operands,
3753                                               MCStreamer &Out,
3754                                               uint64_t &ErrorInfo,
3755                                               bool MatchingInlineAsm) {
3756   MCInst Inst;
3757   unsigned Result = Match_Success;
3758   for (auto Variant : getMatchedVariants()) {
3759     uint64_t EI;
3760     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3761                                   Variant);
3762     // We order match statuses from least to most specific. We use most specific
3763     // status as resulting
3764     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3765     if ((R == Match_Success) ||
3766         (R == Match_PreferE32) ||
3767         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3768         (R == Match_InvalidOperand && Result != Match_MissingFeature
3769                                    && Result != Match_PreferE32) ||
3770         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3771                                    && Result != Match_MissingFeature
3772                                    && Result != Match_PreferE32)) {
3773       Result = R;
3774       ErrorInfo = EI;
3775     }
3776     if (R == Match_Success)
3777       break;
3778   }
3779 
3780   switch (Result) {
3781   default: break;
3782   case Match_Success:
3783     if (!validateInstruction(Inst, IDLoc, Operands)) {
3784       return true;
3785     }
3786     Inst.setLoc(IDLoc);
3787     Out.emitInstruction(Inst, getSTI());
3788     return false;
3789 
3790   case Match_MissingFeature:
3791     return Error(IDLoc, "instruction not supported on this GPU");
3792 
3793   case Match_MnemonicFail: {
3794     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3795     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3796         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3797     return Error(IDLoc, "invalid instruction" + Suggestion,
3798                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3799   }
3800 
3801   case Match_InvalidOperand: {
3802     SMLoc ErrorLoc = IDLoc;
3803     if (ErrorInfo != ~0ULL) {
3804       if (ErrorInfo >= Operands.size()) {
3805         return Error(IDLoc, "too few operands for instruction");
3806       }
3807       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3808       if (ErrorLoc == SMLoc())
3809         ErrorLoc = IDLoc;
3810     }
3811     return Error(ErrorLoc, "invalid operand for instruction");
3812   }
3813 
3814   case Match_PreferE32:
3815     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3816                         "should be encoded as e32");
3817   }
3818   llvm_unreachable("Implement any new match types added!");
3819 }
3820 
3821 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3822   int64_t Tmp = -1;
3823   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3824     return true;
3825   }
3826   if (getParser().parseAbsoluteExpression(Tmp)) {
3827     return true;
3828   }
3829   Ret = static_cast<uint32_t>(Tmp);
3830   return false;
3831 }
3832 
3833 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3834                                                uint32_t &Minor) {
3835   if (ParseAsAbsoluteExpression(Major))
3836     return TokError("invalid major version");
3837 
3838   if (getLexer().isNot(AsmToken::Comma))
3839     return TokError("minor version number required, comma expected");
3840   Lex();
3841 
3842   if (ParseAsAbsoluteExpression(Minor))
3843     return TokError("invalid minor version");
3844 
3845   return false;
3846 }
3847 
3848 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3849   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3850     return TokError("directive only supported for amdgcn architecture");
3851 
3852   std::string Target;
3853 
3854   SMLoc TargetStart = getTok().getLoc();
3855   if (getParser().parseEscapedString(Target))
3856     return true;
3857   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3858 
3859   std::string ExpectedTarget;
3860   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3861   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3862 
3863   if (Target != ExpectedTargetOS.str())
3864     return getParser().Error(TargetRange.Start, "target must match options",
3865                              TargetRange);
3866 
3867   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3868   return false;
3869 }
3870 
3871 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3872   return getParser().Error(Range.Start, "value out of range", Range);
3873 }
3874 
3875 bool AMDGPUAsmParser::calculateGPRBlocks(
3876     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3877     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3878     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3879     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3880   // TODO(scott.linder): These calculations are duplicated from
3881   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3882   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3883 
3884   unsigned NumVGPRs = NextFreeVGPR;
3885   unsigned NumSGPRs = NextFreeSGPR;
3886 
3887   if (Version.Major >= 10)
3888     NumSGPRs = 0;
3889   else {
3890     unsigned MaxAddressableNumSGPRs =
3891         IsaInfo::getAddressableNumSGPRs(&getSTI());
3892 
3893     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3894         NumSGPRs > MaxAddressableNumSGPRs)
3895       return OutOfRangeError(SGPRRange);
3896 
3897     NumSGPRs +=
3898         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3899 
3900     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3901         NumSGPRs > MaxAddressableNumSGPRs)
3902       return OutOfRangeError(SGPRRange);
3903 
3904     if (Features.test(FeatureSGPRInitBug))
3905       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3906   }
3907 
3908   VGPRBlocks =
3909       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3910   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3911 
3912   return false;
3913 }
3914 
3915 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3916   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3917     return TokError("directive only supported for amdgcn architecture");
3918 
3919   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3920     return TokError("directive only supported for amdhsa OS");
3921 
3922   StringRef KernelName;
3923   if (getParser().parseIdentifier(KernelName))
3924     return true;
3925 
3926   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3927 
3928   StringSet<> Seen;
3929 
3930   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3931 
3932   SMRange VGPRRange;
3933   uint64_t NextFreeVGPR = 0;
3934   SMRange SGPRRange;
3935   uint64_t NextFreeSGPR = 0;
3936   unsigned UserSGPRCount = 0;
3937   bool ReserveVCC = true;
3938   bool ReserveFlatScr = true;
3939   bool ReserveXNACK = hasXNACK();
3940   Optional<bool> EnableWavefrontSize32;
3941 
3942   while (true) {
3943     while (getLexer().is(AsmToken::EndOfStatement))
3944       Lex();
3945 
3946     if (getLexer().isNot(AsmToken::Identifier))
3947       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3948 
3949     StringRef ID = getTok().getIdentifier();
3950     SMRange IDRange = getTok().getLocRange();
3951     Lex();
3952 
3953     if (ID == ".end_amdhsa_kernel")
3954       break;
3955 
3956     if (Seen.find(ID) != Seen.end())
3957       return TokError(".amdhsa_ directives cannot be repeated");
3958     Seen.insert(ID);
3959 
3960     SMLoc ValStart = getTok().getLoc();
3961     int64_t IVal;
3962     if (getParser().parseAbsoluteExpression(IVal))
3963       return true;
3964     SMLoc ValEnd = getTok().getLoc();
3965     SMRange ValRange = SMRange(ValStart, ValEnd);
3966 
3967     if (IVal < 0)
3968       return OutOfRangeError(ValRange);
3969 
3970     uint64_t Val = IVal;
3971 
3972 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3973   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3974     return OutOfRangeError(RANGE);                                             \
3975   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3976 
3977     if (ID == ".amdhsa_group_segment_fixed_size") {
3978       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3979         return OutOfRangeError(ValRange);
3980       KD.group_segment_fixed_size = Val;
3981     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3982       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3983         return OutOfRangeError(ValRange);
3984       KD.private_segment_fixed_size = Val;
3985     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3986       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3987                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3988                        Val, ValRange);
3989       if (Val)
3990         UserSGPRCount += 4;
3991     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3992       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3993                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3994                        ValRange);
3995       if (Val)
3996         UserSGPRCount += 2;
3997     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3998       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3999                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4000                        ValRange);
4001       if (Val)
4002         UserSGPRCount += 2;
4003     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4004       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4005                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4006                        Val, ValRange);
4007       if (Val)
4008         UserSGPRCount += 2;
4009     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4010       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4011                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4012                        ValRange);
4013       if (Val)
4014         UserSGPRCount += 2;
4015     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4016       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4017                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4018                        ValRange);
4019       if (Val)
4020         UserSGPRCount += 2;
4021     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4022       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4023                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4024                        Val, ValRange);
4025       if (Val)
4026         UserSGPRCount += 1;
4027     } else if (ID == ".amdhsa_wavefront_size32") {
4028       if (IVersion.Major < 10)
4029         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4030                                  IDRange);
4031       EnableWavefrontSize32 = Val;
4032       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4033                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4034                        Val, ValRange);
4035     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4036       PARSE_BITS_ENTRY(
4037           KD.compute_pgm_rsrc2,
4038           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4039           ValRange);
4040     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4041       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4042                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4043                        ValRange);
4044     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4045       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4046                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4047                        ValRange);
4048     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4049       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4050                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4051                        ValRange);
4052     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4053       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4054                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4055                        ValRange);
4056     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4057       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4058                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4059                        ValRange);
4060     } else if (ID == ".amdhsa_next_free_vgpr") {
4061       VGPRRange = ValRange;
4062       NextFreeVGPR = Val;
4063     } else if (ID == ".amdhsa_next_free_sgpr") {
4064       SGPRRange = ValRange;
4065       NextFreeSGPR = Val;
4066     } else if (ID == ".amdhsa_reserve_vcc") {
4067       if (!isUInt<1>(Val))
4068         return OutOfRangeError(ValRange);
4069       ReserveVCC = Val;
4070     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4071       if (IVersion.Major < 7)
4072         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4073                                  IDRange);
4074       if (!isUInt<1>(Val))
4075         return OutOfRangeError(ValRange);
4076       ReserveFlatScr = Val;
4077     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4078       if (IVersion.Major < 8)
4079         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4080                                  IDRange);
4081       if (!isUInt<1>(Val))
4082         return OutOfRangeError(ValRange);
4083       ReserveXNACK = Val;
4084     } else if (ID == ".amdhsa_float_round_mode_32") {
4085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4086                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4087     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4089                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4090     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4091       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4092                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4093     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4094       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4095                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4096                        ValRange);
4097     } else if (ID == ".amdhsa_dx10_clamp") {
4098       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4099                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4100     } else if (ID == ".amdhsa_ieee_mode") {
4101       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4102                        Val, ValRange);
4103     } else if (ID == ".amdhsa_fp16_overflow") {
4104       if (IVersion.Major < 9)
4105         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4106                                  IDRange);
4107       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4108                        ValRange);
4109     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4110       if (IVersion.Major < 10)
4111         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4112                                  IDRange);
4113       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4114                        ValRange);
4115     } else if (ID == ".amdhsa_memory_ordered") {
4116       if (IVersion.Major < 10)
4117         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4118                                  IDRange);
4119       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4120                        ValRange);
4121     } else if (ID == ".amdhsa_forward_progress") {
4122       if (IVersion.Major < 10)
4123         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4124                                  IDRange);
4125       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4126                        ValRange);
4127     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4128       PARSE_BITS_ENTRY(
4129           KD.compute_pgm_rsrc2,
4130           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4131           ValRange);
4132     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4133       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4134                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4135                        Val, ValRange);
4136     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4137       PARSE_BITS_ENTRY(
4138           KD.compute_pgm_rsrc2,
4139           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4140           ValRange);
4141     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4142       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4143                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4144                        Val, ValRange);
4145     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4146       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4147                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4148                        Val, ValRange);
4149     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4150       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4151                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4152                        Val, ValRange);
4153     } else if (ID == ".amdhsa_exception_int_div_zero") {
4154       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4155                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4156                        Val, ValRange);
4157     } else {
4158       return getParser().Error(IDRange.Start,
4159                                "unknown .amdhsa_kernel directive", IDRange);
4160     }
4161 
4162 #undef PARSE_BITS_ENTRY
4163   }
4164 
4165   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4166     return TokError(".amdhsa_next_free_vgpr directive is required");
4167 
4168   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4169     return TokError(".amdhsa_next_free_sgpr directive is required");
4170 
4171   unsigned VGPRBlocks;
4172   unsigned SGPRBlocks;
4173   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4174                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4175                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4176                          SGPRBlocks))
4177     return true;
4178 
4179   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4180           VGPRBlocks))
4181     return OutOfRangeError(VGPRRange);
4182   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4183                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4184 
4185   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4186           SGPRBlocks))
4187     return OutOfRangeError(SGPRRange);
4188   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4189                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4190                   SGPRBlocks);
4191 
4192   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4193     return TokError("too many user SGPRs enabled");
4194   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4195                   UserSGPRCount);
4196 
4197   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4198       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4199       ReserveFlatScr, ReserveXNACK);
4200   return false;
4201 }
4202 
4203 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4204   uint32_t Major;
4205   uint32_t Minor;
4206 
4207   if (ParseDirectiveMajorMinor(Major, Minor))
4208     return true;
4209 
4210   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4211   return false;
4212 }
4213 
4214 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4215   uint32_t Major;
4216   uint32_t Minor;
4217   uint32_t Stepping;
4218   StringRef VendorName;
4219   StringRef ArchName;
4220 
4221   // If this directive has no arguments, then use the ISA version for the
4222   // targeted GPU.
4223   if (getLexer().is(AsmToken::EndOfStatement)) {
4224     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4225     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4226                                                       ISA.Stepping,
4227                                                       "AMD", "AMDGPU");
4228     return false;
4229   }
4230 
4231   if (ParseDirectiveMajorMinor(Major, Minor))
4232     return true;
4233 
4234   if (getLexer().isNot(AsmToken::Comma))
4235     return TokError("stepping version number required, comma expected");
4236   Lex();
4237 
4238   if (ParseAsAbsoluteExpression(Stepping))
4239     return TokError("invalid stepping version");
4240 
4241   if (getLexer().isNot(AsmToken::Comma))
4242     return TokError("vendor name required, comma expected");
4243   Lex();
4244 
4245   if (getLexer().isNot(AsmToken::String))
4246     return TokError("invalid vendor name");
4247 
4248   VendorName = getLexer().getTok().getStringContents();
4249   Lex();
4250 
4251   if (getLexer().isNot(AsmToken::Comma))
4252     return TokError("arch name required, comma expected");
4253   Lex();
4254 
4255   if (getLexer().isNot(AsmToken::String))
4256     return TokError("invalid arch name");
4257 
4258   ArchName = getLexer().getTok().getStringContents();
4259   Lex();
4260 
4261   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4262                                                     VendorName, ArchName);
4263   return false;
4264 }
4265 
4266 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4267                                                amd_kernel_code_t &Header) {
4268   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4269   // assembly for backwards compatibility.
4270   if (ID == "max_scratch_backing_memory_byte_size") {
4271     Parser.eatToEndOfStatement();
4272     return false;
4273   }
4274 
4275   SmallString<40> ErrStr;
4276   raw_svector_ostream Err(ErrStr);
4277   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4278     return TokError(Err.str());
4279   }
4280   Lex();
4281 
4282   if (ID == "enable_wavefront_size32") {
4283     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4284       if (!isGFX10())
4285         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4286       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4287         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4288     } else {
4289       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4290         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4291     }
4292   }
4293 
4294   if (ID == "wavefront_size") {
4295     if (Header.wavefront_size == 5) {
4296       if (!isGFX10())
4297         return TokError("wavefront_size=5 is only allowed on GFX10+");
4298       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4299         return TokError("wavefront_size=5 requires +WavefrontSize32");
4300     } else if (Header.wavefront_size == 6) {
4301       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4302         return TokError("wavefront_size=6 requires +WavefrontSize64");
4303     }
4304   }
4305 
4306   if (ID == "enable_wgp_mode") {
4307     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4308       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4309   }
4310 
4311   if (ID == "enable_mem_ordered") {
4312     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4313       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4314   }
4315 
4316   if (ID == "enable_fwd_progress") {
4317     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4318       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4319   }
4320 
4321   return false;
4322 }
4323 
4324 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4325   amd_kernel_code_t Header;
4326   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4327 
4328   while (true) {
4329     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4330     // will set the current token to EndOfStatement.
4331     while(getLexer().is(AsmToken::EndOfStatement))
4332       Lex();
4333 
4334     if (getLexer().isNot(AsmToken::Identifier))
4335       return TokError("expected value identifier or .end_amd_kernel_code_t");
4336 
4337     StringRef ID = getLexer().getTok().getIdentifier();
4338     Lex();
4339 
4340     if (ID == ".end_amd_kernel_code_t")
4341       break;
4342 
4343     if (ParseAMDKernelCodeTValue(ID, Header))
4344       return true;
4345   }
4346 
4347   getTargetStreamer().EmitAMDKernelCodeT(Header);
4348 
4349   return false;
4350 }
4351 
4352 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4353   if (getLexer().isNot(AsmToken::Identifier))
4354     return TokError("expected symbol name");
4355 
4356   StringRef KernelName = Parser.getTok().getString();
4357 
4358   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4359                                            ELF::STT_AMDGPU_HSA_KERNEL);
4360   Lex();
4361   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4362     KernelScope.initialize(getContext());
4363   return false;
4364 }
4365 
4366 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4367   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4368     return Error(getParser().getTok().getLoc(),
4369                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4370                  "architectures");
4371   }
4372 
4373   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4374 
4375   std::string ISAVersionStringFromSTI;
4376   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4377   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4378 
4379   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4380     return Error(getParser().getTok().getLoc(),
4381                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4382                  "arguments specified through the command line");
4383   }
4384 
4385   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4386   Lex();
4387 
4388   return false;
4389 }
4390 
4391 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4392   const char *AssemblerDirectiveBegin;
4393   const char *AssemblerDirectiveEnd;
4394   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4395       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4396           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4397                             HSAMD::V3::AssemblerDirectiveEnd)
4398           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4399                             HSAMD::AssemblerDirectiveEnd);
4400 
4401   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4402     return Error(getParser().getTok().getLoc(),
4403                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4404                  "not available on non-amdhsa OSes")).str());
4405   }
4406 
4407   std::string HSAMetadataString;
4408   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4409                           HSAMetadataString))
4410     return true;
4411 
4412   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4413     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4414       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4415   } else {
4416     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4417       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4418   }
4419 
4420   return false;
4421 }
4422 
4423 /// Common code to parse out a block of text (typically YAML) between start and
4424 /// end directives.
4425 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4426                                           const char *AssemblerDirectiveEnd,
4427                                           std::string &CollectString) {
4428 
4429   raw_string_ostream CollectStream(CollectString);
4430 
4431   getLexer().setSkipSpace(false);
4432 
4433   bool FoundEnd = false;
4434   while (!getLexer().is(AsmToken::Eof)) {
4435     while (getLexer().is(AsmToken::Space)) {
4436       CollectStream << getLexer().getTok().getString();
4437       Lex();
4438     }
4439 
4440     if (getLexer().is(AsmToken::Identifier)) {
4441       StringRef ID = getLexer().getTok().getIdentifier();
4442       if (ID == AssemblerDirectiveEnd) {
4443         Lex();
4444         FoundEnd = true;
4445         break;
4446       }
4447     }
4448 
4449     CollectStream << Parser.parseStringToEndOfStatement()
4450                   << getContext().getAsmInfo()->getSeparatorString();
4451 
4452     Parser.eatToEndOfStatement();
4453   }
4454 
4455   getLexer().setSkipSpace(true);
4456 
4457   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4458     return TokError(Twine("expected directive ") +
4459                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4460   }
4461 
4462   CollectStream.flush();
4463   return false;
4464 }
4465 
4466 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4467 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4468   std::string String;
4469   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4470                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4471     return true;
4472 
4473   auto PALMetadata = getTargetStreamer().getPALMetadata();
4474   if (!PALMetadata->setFromString(String))
4475     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4476   return false;
4477 }
4478 
4479 /// Parse the assembler directive for old linear-format PAL metadata.
4480 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4481   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4482     return Error(getParser().getTok().getLoc(),
4483                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4484                  "not available on non-amdpal OSes")).str());
4485   }
4486 
4487   auto PALMetadata = getTargetStreamer().getPALMetadata();
4488   PALMetadata->setLegacy();
4489   for (;;) {
4490     uint32_t Key, Value;
4491     if (ParseAsAbsoluteExpression(Key)) {
4492       return TokError(Twine("invalid value in ") +
4493                       Twine(PALMD::AssemblerDirective));
4494     }
4495     if (getLexer().isNot(AsmToken::Comma)) {
4496       return TokError(Twine("expected an even number of values in ") +
4497                       Twine(PALMD::AssemblerDirective));
4498     }
4499     Lex();
4500     if (ParseAsAbsoluteExpression(Value)) {
4501       return TokError(Twine("invalid value in ") +
4502                       Twine(PALMD::AssemblerDirective));
4503     }
4504     PALMetadata->setRegister(Key, Value);
4505     if (getLexer().isNot(AsmToken::Comma))
4506       break;
4507     Lex();
4508   }
4509   return false;
4510 }
4511 
4512 /// ParseDirectiveAMDGPULDS
4513 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4514 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4515   if (getParser().checkForValidSection())
4516     return true;
4517 
4518   StringRef Name;
4519   SMLoc NameLoc = getLexer().getLoc();
4520   if (getParser().parseIdentifier(Name))
4521     return TokError("expected identifier in directive");
4522 
4523   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4524   if (parseToken(AsmToken::Comma, "expected ','"))
4525     return true;
4526 
4527   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4528 
4529   int64_t Size;
4530   SMLoc SizeLoc = getLexer().getLoc();
4531   if (getParser().parseAbsoluteExpression(Size))
4532     return true;
4533   if (Size < 0)
4534     return Error(SizeLoc, "size must be non-negative");
4535   if (Size > LocalMemorySize)
4536     return Error(SizeLoc, "size is too large");
4537 
4538   int64_t Alignment = 4;
4539   if (getLexer().is(AsmToken::Comma)) {
4540     Lex();
4541     SMLoc AlignLoc = getLexer().getLoc();
4542     if (getParser().parseAbsoluteExpression(Alignment))
4543       return true;
4544     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4545       return Error(AlignLoc, "alignment must be a power of two");
4546 
4547     // Alignment larger than the size of LDS is possible in theory, as long
4548     // as the linker manages to place to symbol at address 0, but we do want
4549     // to make sure the alignment fits nicely into a 32-bit integer.
4550     if (Alignment >= 1u << 31)
4551       return Error(AlignLoc, "alignment is too large");
4552   }
4553 
4554   if (parseToken(AsmToken::EndOfStatement,
4555                  "unexpected token in '.amdgpu_lds' directive"))
4556     return true;
4557 
4558   Symbol->redefineIfPossible();
4559   if (!Symbol->isUndefined())
4560     return Error(NameLoc, "invalid symbol redefinition");
4561 
4562   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4563   return false;
4564 }
4565 
4566 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4567   StringRef IDVal = DirectiveID.getString();
4568 
4569   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4570     if (IDVal == ".amdgcn_target")
4571       return ParseDirectiveAMDGCNTarget();
4572 
4573     if (IDVal == ".amdhsa_kernel")
4574       return ParseDirectiveAMDHSAKernel();
4575 
4576     // TODO: Restructure/combine with PAL metadata directive.
4577     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4578       return ParseDirectiveHSAMetadata();
4579   } else {
4580     if (IDVal == ".hsa_code_object_version")
4581       return ParseDirectiveHSACodeObjectVersion();
4582 
4583     if (IDVal == ".hsa_code_object_isa")
4584       return ParseDirectiveHSACodeObjectISA();
4585 
4586     if (IDVal == ".amd_kernel_code_t")
4587       return ParseDirectiveAMDKernelCodeT();
4588 
4589     if (IDVal == ".amdgpu_hsa_kernel")
4590       return ParseDirectiveAMDGPUHsaKernel();
4591 
4592     if (IDVal == ".amd_amdgpu_isa")
4593       return ParseDirectiveISAVersion();
4594 
4595     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4596       return ParseDirectiveHSAMetadata();
4597   }
4598 
4599   if (IDVal == ".amdgpu_lds")
4600     return ParseDirectiveAMDGPULDS();
4601 
4602   if (IDVal == PALMD::AssemblerDirectiveBegin)
4603     return ParseDirectivePALMetadataBegin();
4604 
4605   if (IDVal == PALMD::AssemblerDirective)
4606     return ParseDirectivePALMetadata();
4607 
4608   return true;
4609 }
4610 
4611 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4612                                            unsigned RegNo) const {
4613 
4614   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4615        R.isValid(); ++R) {
4616     if (*R == RegNo)
4617       return isGFX9() || isGFX10();
4618   }
4619 
4620   // GFX10 has 2 more SGPRs 104 and 105.
4621   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4622        R.isValid(); ++R) {
4623     if (*R == RegNo)
4624       return hasSGPR104_SGPR105();
4625   }
4626 
4627   switch (RegNo) {
4628   case AMDGPU::SRC_SHARED_BASE:
4629   case AMDGPU::SRC_SHARED_LIMIT:
4630   case AMDGPU::SRC_PRIVATE_BASE:
4631   case AMDGPU::SRC_PRIVATE_LIMIT:
4632   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4633     return !isCI() && !isSI() && !isVI();
4634   case AMDGPU::TBA:
4635   case AMDGPU::TBA_LO:
4636   case AMDGPU::TBA_HI:
4637   case AMDGPU::TMA:
4638   case AMDGPU::TMA_LO:
4639   case AMDGPU::TMA_HI:
4640     return !isGFX9() && !isGFX10();
4641   case AMDGPU::XNACK_MASK:
4642   case AMDGPU::XNACK_MASK_LO:
4643   case AMDGPU::XNACK_MASK_HI:
4644     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4645   case AMDGPU::SGPR_NULL:
4646     return isGFX10();
4647   default:
4648     break;
4649   }
4650 
4651   if (isCI())
4652     return true;
4653 
4654   if (isSI() || isGFX10()) {
4655     // No flat_scr on SI.
4656     // On GFX10 flat scratch is not a valid register operand and can only be
4657     // accessed with s_setreg/s_getreg.
4658     switch (RegNo) {
4659     case AMDGPU::FLAT_SCR:
4660     case AMDGPU::FLAT_SCR_LO:
4661     case AMDGPU::FLAT_SCR_HI:
4662       return false;
4663     default:
4664       return true;
4665     }
4666   }
4667 
4668   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4669   // SI/CI have.
4670   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4671        R.isValid(); ++R) {
4672     if (*R == RegNo)
4673       return hasSGPR102_SGPR103();
4674   }
4675 
4676   return true;
4677 }
4678 
4679 OperandMatchResultTy
4680 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4681                               OperandMode Mode) {
4682   // Try to parse with a custom parser
4683   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4684 
4685   // If we successfully parsed the operand or if there as an error parsing,
4686   // we are done.
4687   //
4688   // If we are parsing after we reach EndOfStatement then this means we
4689   // are appending default values to the Operands list.  This is only done
4690   // by custom parser, so we shouldn't continue on to the generic parsing.
4691   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4692       getLexer().is(AsmToken::EndOfStatement))
4693     return ResTy;
4694 
4695   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4696     unsigned Prefix = Operands.size();
4697     SMLoc LBraceLoc = getTok().getLoc();
4698     Parser.Lex(); // eat the '['
4699 
4700     for (;;) {
4701       ResTy = parseReg(Operands);
4702       if (ResTy != MatchOperand_Success)
4703         return ResTy;
4704 
4705       if (getLexer().is(AsmToken::RBrac))
4706         break;
4707 
4708       if (getLexer().isNot(AsmToken::Comma))
4709         return MatchOperand_ParseFail;
4710       Parser.Lex();
4711     }
4712 
4713     if (Operands.size() - Prefix > 1) {
4714       Operands.insert(Operands.begin() + Prefix,
4715                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4716       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4717                                                     getTok().getLoc()));
4718     }
4719 
4720     Parser.Lex(); // eat the ']'
4721     return MatchOperand_Success;
4722   }
4723 
4724   return parseRegOrImm(Operands);
4725 }
4726 
4727 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4728   // Clear any forced encodings from the previous instruction.
4729   setForcedEncodingSize(0);
4730   setForcedDPP(false);
4731   setForcedSDWA(false);
4732 
4733   if (Name.endswith("_e64")) {
4734     setForcedEncodingSize(64);
4735     return Name.substr(0, Name.size() - 4);
4736   } else if (Name.endswith("_e32")) {
4737     setForcedEncodingSize(32);
4738     return Name.substr(0, Name.size() - 4);
4739   } else if (Name.endswith("_dpp")) {
4740     setForcedDPP(true);
4741     return Name.substr(0, Name.size() - 4);
4742   } else if (Name.endswith("_sdwa")) {
4743     setForcedSDWA(true);
4744     return Name.substr(0, Name.size() - 5);
4745   }
4746   return Name;
4747 }
4748 
4749 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4750                                        StringRef Name,
4751                                        SMLoc NameLoc, OperandVector &Operands) {
4752   // Add the instruction mnemonic
4753   Name = parseMnemonicSuffix(Name);
4754   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4755 
4756   bool IsMIMG = Name.startswith("image_");
4757 
4758   while (!getLexer().is(AsmToken::EndOfStatement)) {
4759     OperandMode Mode = OperandMode_Default;
4760     if (IsMIMG && isGFX10() && Operands.size() == 2)
4761       Mode = OperandMode_NSA;
4762     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4763 
4764     // Eat the comma or space if there is one.
4765     if (getLexer().is(AsmToken::Comma))
4766       Parser.Lex();
4767 
4768     if (Res != MatchOperand_Success) {
4769       if (!Parser.hasPendingError()) {
4770         // FIXME: use real operand location rather than the current location.
4771         StringRef Msg =
4772           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4773                                             "not a valid operand.";
4774         Error(getLexer().getLoc(), Msg);
4775       }
4776       while (!getLexer().is(AsmToken::EndOfStatement)) {
4777         Parser.Lex();
4778       }
4779       return true;
4780     }
4781   }
4782 
4783   return false;
4784 }
4785 
4786 //===----------------------------------------------------------------------===//
4787 // Utility functions
4788 //===----------------------------------------------------------------------===//
4789 
4790 OperandMatchResultTy
4791 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4792 
4793   if (!trySkipId(Prefix, AsmToken::Colon))
4794     return MatchOperand_NoMatch;
4795 
4796   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4797 }
4798 
4799 OperandMatchResultTy
4800 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4801                                     AMDGPUOperand::ImmTy ImmTy,
4802                                     bool (*ConvertResult)(int64_t&)) {
4803   SMLoc S = getLoc();
4804   int64_t Value = 0;
4805 
4806   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4807   if (Res != MatchOperand_Success)
4808     return Res;
4809 
4810   if (ConvertResult && !ConvertResult(Value)) {
4811     Error(S, "invalid " + StringRef(Prefix) + " value.");
4812   }
4813 
4814   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4815   return MatchOperand_Success;
4816 }
4817 
4818 OperandMatchResultTy
4819 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4820                                              OperandVector &Operands,
4821                                              AMDGPUOperand::ImmTy ImmTy,
4822                                              bool (*ConvertResult)(int64_t&)) {
4823   SMLoc S = getLoc();
4824   if (!trySkipId(Prefix, AsmToken::Colon))
4825     return MatchOperand_NoMatch;
4826 
4827   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4828     return MatchOperand_ParseFail;
4829 
4830   unsigned Val = 0;
4831   const unsigned MaxSize = 4;
4832 
4833   // FIXME: How to verify the number of elements matches the number of src
4834   // operands?
4835   for (int I = 0; ; ++I) {
4836     int64_t Op;
4837     SMLoc Loc = getLoc();
4838     if (!parseExpr(Op))
4839       return MatchOperand_ParseFail;
4840 
4841     if (Op != 0 && Op != 1) {
4842       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4843       return MatchOperand_ParseFail;
4844     }
4845 
4846     Val |= (Op << I);
4847 
4848     if (trySkipToken(AsmToken::RBrac))
4849       break;
4850 
4851     if (I + 1 == MaxSize) {
4852       Error(getLoc(), "expected a closing square bracket");
4853       return MatchOperand_ParseFail;
4854     }
4855 
4856     if (!skipToken(AsmToken::Comma, "expected a comma"))
4857       return MatchOperand_ParseFail;
4858   }
4859 
4860   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4861   return MatchOperand_Success;
4862 }
4863 
4864 OperandMatchResultTy
4865 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4866                                AMDGPUOperand::ImmTy ImmTy) {
4867   int64_t Bit = 0;
4868   SMLoc S = Parser.getTok().getLoc();
4869 
4870   // We are at the end of the statement, and this is a default argument, so
4871   // use a default value.
4872   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4873     switch(getLexer().getKind()) {
4874       case AsmToken::Identifier: {
4875         StringRef Tok = Parser.getTok().getString();
4876         if (Tok == Name) {
4877           if (Tok == "r128" && !hasMIMG_R128())
4878             Error(S, "r128 modifier is not supported on this GPU");
4879           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4880             Error(S, "a16 modifier is not supported on this GPU");
4881           Bit = 1;
4882           Parser.Lex();
4883         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4884           Bit = 0;
4885           Parser.Lex();
4886         } else {
4887           return MatchOperand_NoMatch;
4888         }
4889         break;
4890       }
4891       default:
4892         return MatchOperand_NoMatch;
4893     }
4894   }
4895 
4896   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4897     return MatchOperand_ParseFail;
4898 
4899   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4900     ImmTy = AMDGPUOperand::ImmTyR128A16;
4901 
4902   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4903   return MatchOperand_Success;
4904 }
4905 
4906 static void addOptionalImmOperand(
4907   MCInst& Inst, const OperandVector& Operands,
4908   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4909   AMDGPUOperand::ImmTy ImmT,
4910   int64_t Default = 0) {
4911   auto i = OptionalIdx.find(ImmT);
4912   if (i != OptionalIdx.end()) {
4913     unsigned Idx = i->second;
4914     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4915   } else {
4916     Inst.addOperand(MCOperand::createImm(Default));
4917   }
4918 }
4919 
4920 OperandMatchResultTy
4921 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4922   if (getLexer().isNot(AsmToken::Identifier)) {
4923     return MatchOperand_NoMatch;
4924   }
4925   StringRef Tok = Parser.getTok().getString();
4926   if (Tok != Prefix) {
4927     return MatchOperand_NoMatch;
4928   }
4929 
4930   Parser.Lex();
4931   if (getLexer().isNot(AsmToken::Colon)) {
4932     return MatchOperand_ParseFail;
4933   }
4934 
4935   Parser.Lex();
4936   if (getLexer().isNot(AsmToken::Identifier)) {
4937     return MatchOperand_ParseFail;
4938   }
4939 
4940   Value = Parser.getTok().getString();
4941   return MatchOperand_Success;
4942 }
4943 
4944 //===----------------------------------------------------------------------===//
4945 // MTBUF format
4946 //===----------------------------------------------------------------------===//
4947 
4948 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
4949                                   int64_t MaxVal,
4950                                   int64_t &Fmt) {
4951   int64_t Val;
4952   SMLoc Loc = getLoc();
4953 
4954   auto Res = parseIntWithPrefix(Pref, Val);
4955   if (Res == MatchOperand_ParseFail)
4956     return false;
4957   if (Res == MatchOperand_NoMatch)
4958     return true;
4959 
4960   if (Val < 0 || Val > MaxVal) {
4961     Error(Loc, Twine("out of range ", StringRef(Pref)));
4962     return false;
4963   }
4964 
4965   Fmt = Val;
4966   return true;
4967 }
4968 
4969 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4970 // values to live in a joint format operand in the MCInst encoding.
4971 OperandMatchResultTy
4972 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
4973   using namespace llvm::AMDGPU::MTBUFFormat;
4974 
4975   int64_t Dfmt = DFMT_UNDEF;
4976   int64_t Nfmt = NFMT_UNDEF;
4977 
4978   // dfmt and nfmt can appear in either order, and each is optional.
4979   for (int I = 0; I < 2; ++I) {
4980     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
4981       return MatchOperand_ParseFail;
4982 
4983     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
4984       return MatchOperand_ParseFail;
4985     }
4986     // Skip optional comma between dfmt/nfmt
4987     // but guard against 2 commas following each other.
4988     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
4989         !peekToken().is(AsmToken::Comma)) {
4990       trySkipToken(AsmToken::Comma);
4991     }
4992   }
4993 
4994   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
4995     return MatchOperand_NoMatch;
4996 
4997   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
4998   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
4999 
5000   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5001   return MatchOperand_Success;
5002 }
5003 
5004 OperandMatchResultTy
5005 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5006   using namespace llvm::AMDGPU::MTBUFFormat;
5007 
5008   int64_t Fmt = UFMT_UNDEF;
5009 
5010   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5011     return MatchOperand_ParseFail;
5012 
5013   if (Fmt == UFMT_UNDEF)
5014     return MatchOperand_NoMatch;
5015 
5016   Format = Fmt;
5017   return MatchOperand_Success;
5018 }
5019 
5020 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5021                                     int64_t &Nfmt,
5022                                     StringRef FormatStr,
5023                                     SMLoc Loc) {
5024   using namespace llvm::AMDGPU::MTBUFFormat;
5025   int64_t Format;
5026 
5027   Format = getDfmt(FormatStr);
5028   if (Format != DFMT_UNDEF) {
5029     Dfmt = Format;
5030     return true;
5031   }
5032 
5033   Format = getNfmt(FormatStr, getSTI());
5034   if (Format != NFMT_UNDEF) {
5035     Nfmt = Format;
5036     return true;
5037   }
5038 
5039   Error(Loc, "unsupported format");
5040   return false;
5041 }
5042 
5043 OperandMatchResultTy
5044 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5045                                           SMLoc FormatLoc,
5046                                           int64_t &Format) {
5047   using namespace llvm::AMDGPU::MTBUFFormat;
5048 
5049   int64_t Dfmt = DFMT_UNDEF;
5050   int64_t Nfmt = NFMT_UNDEF;
5051   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5052     return MatchOperand_ParseFail;
5053 
5054   if (trySkipToken(AsmToken::Comma)) {
5055     StringRef Str;
5056     SMLoc Loc = getLoc();
5057     if (!parseId(Str, "expected a format string") ||
5058         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5059       return MatchOperand_ParseFail;
5060     }
5061     if (Dfmt == DFMT_UNDEF) {
5062       Error(Loc, "duplicate numeric format");
5063       return MatchOperand_ParseFail;
5064     } else if (Nfmt == NFMT_UNDEF) {
5065       Error(Loc, "duplicate data format");
5066       return MatchOperand_ParseFail;
5067     }
5068   }
5069 
5070   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5071   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5072 
5073   if (isGFX10()) {
5074     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5075     if (Ufmt == UFMT_UNDEF) {
5076       Error(FormatLoc, "unsupported format");
5077       return MatchOperand_ParseFail;
5078     }
5079     Format = Ufmt;
5080   } else {
5081     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5082   }
5083 
5084   return MatchOperand_Success;
5085 }
5086 
5087 OperandMatchResultTy
5088 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5089                                             SMLoc Loc,
5090                                             int64_t &Format) {
5091   using namespace llvm::AMDGPU::MTBUFFormat;
5092 
5093   auto Id = getUnifiedFormat(FormatStr);
5094   if (Id == UFMT_UNDEF)
5095     return MatchOperand_NoMatch;
5096 
5097   if (!isGFX10()) {
5098     Error(Loc, "unified format is not supported on this GPU");
5099     return MatchOperand_ParseFail;
5100   }
5101 
5102   Format = Id;
5103   return MatchOperand_Success;
5104 }
5105 
5106 OperandMatchResultTy
5107 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5108   using namespace llvm::AMDGPU::MTBUFFormat;
5109   SMLoc Loc = getLoc();
5110 
5111   if (!parseExpr(Format))
5112     return MatchOperand_ParseFail;
5113   if (!isValidFormatEncoding(Format, getSTI())) {
5114     Error(Loc, "out of range format");
5115     return MatchOperand_ParseFail;
5116   }
5117 
5118   return MatchOperand_Success;
5119 }
5120 
5121 OperandMatchResultTy
5122 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5123   using namespace llvm::AMDGPU::MTBUFFormat;
5124 
5125   if (!trySkipId("format", AsmToken::Colon))
5126     return MatchOperand_NoMatch;
5127 
5128   if (trySkipToken(AsmToken::LBrac)) {
5129     StringRef FormatStr;
5130     SMLoc Loc = getLoc();
5131     if (!parseId(FormatStr, "expected a format string"))
5132       return MatchOperand_ParseFail;
5133 
5134     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5135     if (Res == MatchOperand_NoMatch)
5136       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5137     if (Res != MatchOperand_Success)
5138       return Res;
5139 
5140     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5141       return MatchOperand_ParseFail;
5142 
5143     return MatchOperand_Success;
5144   }
5145 
5146   return parseNumericFormat(Format);
5147 }
5148 
5149 OperandMatchResultTy
5150 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5151   using namespace llvm::AMDGPU::MTBUFFormat;
5152 
5153   int64_t Format = getDefaultFormatEncoding(getSTI());
5154   OperandMatchResultTy Res;
5155   SMLoc Loc = getLoc();
5156 
5157   // Parse legacy format syntax.
5158   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5159   if (Res == MatchOperand_ParseFail)
5160     return Res;
5161 
5162   bool FormatFound = (Res == MatchOperand_Success);
5163 
5164   Operands.push_back(
5165     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5166 
5167   if (FormatFound)
5168     trySkipToken(AsmToken::Comma);
5169 
5170   if (isToken(AsmToken::EndOfStatement)) {
5171     // We are expecting an soffset operand,
5172     // but let matcher handle the error.
5173     return MatchOperand_Success;
5174   }
5175 
5176   // Parse soffset.
5177   Res = parseRegOrImm(Operands);
5178   if (Res != MatchOperand_Success)
5179     return Res;
5180 
5181   trySkipToken(AsmToken::Comma);
5182 
5183   if (!FormatFound) {
5184     Res = parseSymbolicOrNumericFormat(Format);
5185     if (Res == MatchOperand_ParseFail)
5186       return Res;
5187     if (Res == MatchOperand_Success) {
5188       auto Size = Operands.size();
5189       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5190       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5191       Op.setImm(Format);
5192     }
5193     return MatchOperand_Success;
5194   }
5195 
5196   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5197     Error(getLoc(), "duplicate format");
5198     return MatchOperand_ParseFail;
5199   }
5200   return MatchOperand_Success;
5201 }
5202 
5203 //===----------------------------------------------------------------------===//
5204 // ds
5205 //===----------------------------------------------------------------------===//
5206 
5207 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5208                                     const OperandVector &Operands) {
5209   OptionalImmIndexMap OptionalIdx;
5210 
5211   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5212     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5213 
5214     // Add the register arguments
5215     if (Op.isReg()) {
5216       Op.addRegOperands(Inst, 1);
5217       continue;
5218     }
5219 
5220     // Handle optional arguments
5221     OptionalIdx[Op.getImmTy()] = i;
5222   }
5223 
5224   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5225   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5226   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5227 
5228   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5229 }
5230 
5231 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5232                                 bool IsGdsHardcoded) {
5233   OptionalImmIndexMap OptionalIdx;
5234 
5235   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5236     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5237 
5238     // Add the register arguments
5239     if (Op.isReg()) {
5240       Op.addRegOperands(Inst, 1);
5241       continue;
5242     }
5243 
5244     if (Op.isToken() && Op.getToken() == "gds") {
5245       IsGdsHardcoded = true;
5246       continue;
5247     }
5248 
5249     // Handle optional arguments
5250     OptionalIdx[Op.getImmTy()] = i;
5251   }
5252 
5253   AMDGPUOperand::ImmTy OffsetType =
5254     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5255      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5256      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5257                                                       AMDGPUOperand::ImmTyOffset;
5258 
5259   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5260 
5261   if (!IsGdsHardcoded) {
5262     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5263   }
5264   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5265 }
5266 
5267 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5268   OptionalImmIndexMap OptionalIdx;
5269 
5270   unsigned OperandIdx[4];
5271   unsigned EnMask = 0;
5272   int SrcIdx = 0;
5273 
5274   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5275     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5276 
5277     // Add the register arguments
5278     if (Op.isReg()) {
5279       assert(SrcIdx < 4);
5280       OperandIdx[SrcIdx] = Inst.size();
5281       Op.addRegOperands(Inst, 1);
5282       ++SrcIdx;
5283       continue;
5284     }
5285 
5286     if (Op.isOff()) {
5287       assert(SrcIdx < 4);
5288       OperandIdx[SrcIdx] = Inst.size();
5289       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5290       ++SrcIdx;
5291       continue;
5292     }
5293 
5294     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5295       Op.addImmOperands(Inst, 1);
5296       continue;
5297     }
5298 
5299     if (Op.isToken() && Op.getToken() == "done")
5300       continue;
5301 
5302     // Handle optional arguments
5303     OptionalIdx[Op.getImmTy()] = i;
5304   }
5305 
5306   assert(SrcIdx == 4);
5307 
5308   bool Compr = false;
5309   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5310     Compr = true;
5311     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5312     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5313     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5314   }
5315 
5316   for (auto i = 0; i < SrcIdx; ++i) {
5317     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5318       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5319     }
5320   }
5321 
5322   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5323   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5324 
5325   Inst.addOperand(MCOperand::createImm(EnMask));
5326 }
5327 
5328 //===----------------------------------------------------------------------===//
5329 // s_waitcnt
5330 //===----------------------------------------------------------------------===//
5331 
5332 static bool
5333 encodeCnt(
5334   const AMDGPU::IsaVersion ISA,
5335   int64_t &IntVal,
5336   int64_t CntVal,
5337   bool Saturate,
5338   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5339   unsigned (*decode)(const IsaVersion &Version, unsigned))
5340 {
5341   bool Failed = false;
5342 
5343   IntVal = encode(ISA, IntVal, CntVal);
5344   if (CntVal != decode(ISA, IntVal)) {
5345     if (Saturate) {
5346       IntVal = encode(ISA, IntVal, -1);
5347     } else {
5348       Failed = true;
5349     }
5350   }
5351   return Failed;
5352 }
5353 
5354 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5355 
5356   SMLoc CntLoc = getLoc();
5357   StringRef CntName = getTokenStr();
5358 
5359   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5360       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5361     return false;
5362 
5363   int64_t CntVal;
5364   SMLoc ValLoc = getLoc();
5365   if (!parseExpr(CntVal))
5366     return false;
5367 
5368   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5369 
5370   bool Failed = true;
5371   bool Sat = CntName.endswith("_sat");
5372 
5373   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5374     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5375   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5376     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5377   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5378     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5379   } else {
5380     Error(CntLoc, "invalid counter name " + CntName);
5381     return false;
5382   }
5383 
5384   if (Failed) {
5385     Error(ValLoc, "too large value for " + CntName);
5386     return false;
5387   }
5388 
5389   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5390     return false;
5391 
5392   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5393     if (isToken(AsmToken::EndOfStatement)) {
5394       Error(getLoc(), "expected a counter name");
5395       return false;
5396     }
5397   }
5398 
5399   return true;
5400 }
5401 
5402 OperandMatchResultTy
5403 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5404   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5405   int64_t Waitcnt = getWaitcntBitMask(ISA);
5406   SMLoc S = getLoc();
5407 
5408   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5409     while (!isToken(AsmToken::EndOfStatement)) {
5410       if (!parseCnt(Waitcnt))
5411         return MatchOperand_ParseFail;
5412     }
5413   } else {
5414     if (!parseExpr(Waitcnt))
5415       return MatchOperand_ParseFail;
5416   }
5417 
5418   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5419   return MatchOperand_Success;
5420 }
5421 
5422 bool
5423 AMDGPUOperand::isSWaitCnt() const {
5424   return isImm();
5425 }
5426 
5427 //===----------------------------------------------------------------------===//
5428 // hwreg
5429 //===----------------------------------------------------------------------===//
5430 
5431 bool
5432 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5433                                 int64_t &Offset,
5434                                 int64_t &Width) {
5435   using namespace llvm::AMDGPU::Hwreg;
5436 
5437   // The register may be specified by name or using a numeric code
5438   if (isToken(AsmToken::Identifier) &&
5439       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5440     HwReg.IsSymbolic = true;
5441     lex(); // skip message name
5442   } else if (!parseExpr(HwReg.Id)) {
5443     return false;
5444   }
5445 
5446   if (trySkipToken(AsmToken::RParen))
5447     return true;
5448 
5449   // parse optional params
5450   return
5451     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5452     parseExpr(Offset) &&
5453     skipToken(AsmToken::Comma, "expected a comma") &&
5454     parseExpr(Width) &&
5455     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5456 }
5457 
5458 bool
5459 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5460                                const int64_t Offset,
5461                                const int64_t Width,
5462                                const SMLoc Loc) {
5463 
5464   using namespace llvm::AMDGPU::Hwreg;
5465 
5466   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5467     Error(Loc, "specified hardware register is not supported on this GPU");
5468     return false;
5469   } else if (!isValidHwreg(HwReg.Id)) {
5470     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5471     return false;
5472   } else if (!isValidHwregOffset(Offset)) {
5473     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5474     return false;
5475   } else if (!isValidHwregWidth(Width)) {
5476     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5477     return false;
5478   }
5479   return true;
5480 }
5481 
5482 OperandMatchResultTy
5483 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5484   using namespace llvm::AMDGPU::Hwreg;
5485 
5486   int64_t ImmVal = 0;
5487   SMLoc Loc = getLoc();
5488 
5489   if (trySkipId("hwreg", AsmToken::LParen)) {
5490     OperandInfoTy HwReg(ID_UNKNOWN_);
5491     int64_t Offset = OFFSET_DEFAULT_;
5492     int64_t Width = WIDTH_DEFAULT_;
5493     if (parseHwregBody(HwReg, Offset, Width) &&
5494         validateHwreg(HwReg, Offset, Width, Loc)) {
5495       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5496     } else {
5497       return MatchOperand_ParseFail;
5498     }
5499   } else if (parseExpr(ImmVal)) {
5500     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5501       Error(Loc, "invalid immediate: only 16-bit values are legal");
5502       return MatchOperand_ParseFail;
5503     }
5504   } else {
5505     return MatchOperand_ParseFail;
5506   }
5507 
5508   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5509   return MatchOperand_Success;
5510 }
5511 
5512 bool AMDGPUOperand::isHwreg() const {
5513   return isImmTy(ImmTyHwreg);
5514 }
5515 
5516 //===----------------------------------------------------------------------===//
5517 // sendmsg
5518 //===----------------------------------------------------------------------===//
5519 
5520 bool
5521 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5522                                   OperandInfoTy &Op,
5523                                   OperandInfoTy &Stream) {
5524   using namespace llvm::AMDGPU::SendMsg;
5525 
5526   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5527     Msg.IsSymbolic = true;
5528     lex(); // skip message name
5529   } else if (!parseExpr(Msg.Id)) {
5530     return false;
5531   }
5532 
5533   if (trySkipToken(AsmToken::Comma)) {
5534     Op.IsDefined = true;
5535     if (isToken(AsmToken::Identifier) &&
5536         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5537       lex(); // skip operation name
5538     } else if (!parseExpr(Op.Id)) {
5539       return false;
5540     }
5541 
5542     if (trySkipToken(AsmToken::Comma)) {
5543       Stream.IsDefined = true;
5544       if (!parseExpr(Stream.Id))
5545         return false;
5546     }
5547   }
5548 
5549   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5550 }
5551 
5552 bool
5553 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5554                                  const OperandInfoTy &Op,
5555                                  const OperandInfoTy &Stream,
5556                                  const SMLoc S) {
5557   using namespace llvm::AMDGPU::SendMsg;
5558 
5559   // Validation strictness depends on whether message is specified
5560   // in a symbolc or in a numeric form. In the latter case
5561   // only encoding possibility is checked.
5562   bool Strict = Msg.IsSymbolic;
5563 
5564   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5565     Error(S, "invalid message id");
5566     return false;
5567   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5568     Error(S, Op.IsDefined ?
5569              "message does not support operations" :
5570              "missing message operation");
5571     return false;
5572   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5573     Error(S, "invalid operation id");
5574     return false;
5575   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5576     Error(S, "message operation does not support streams");
5577     return false;
5578   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5579     Error(S, "invalid message stream id");
5580     return false;
5581   }
5582   return true;
5583 }
5584 
5585 OperandMatchResultTy
5586 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5587   using namespace llvm::AMDGPU::SendMsg;
5588 
5589   int64_t ImmVal = 0;
5590   SMLoc Loc = getLoc();
5591 
5592   if (trySkipId("sendmsg", AsmToken::LParen)) {
5593     OperandInfoTy Msg(ID_UNKNOWN_);
5594     OperandInfoTy Op(OP_NONE_);
5595     OperandInfoTy Stream(STREAM_ID_NONE_);
5596     if (parseSendMsgBody(Msg, Op, Stream) &&
5597         validateSendMsg(Msg, Op, Stream, Loc)) {
5598       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5599     } else {
5600       return MatchOperand_ParseFail;
5601     }
5602   } else if (parseExpr(ImmVal)) {
5603     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5604       Error(Loc, "invalid immediate: only 16-bit values are legal");
5605       return MatchOperand_ParseFail;
5606     }
5607   } else {
5608     return MatchOperand_ParseFail;
5609   }
5610 
5611   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5612   return MatchOperand_Success;
5613 }
5614 
5615 bool AMDGPUOperand::isSendMsg() const {
5616   return isImmTy(ImmTySendMsg);
5617 }
5618 
5619 //===----------------------------------------------------------------------===//
5620 // v_interp
5621 //===----------------------------------------------------------------------===//
5622 
5623 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5624   if (getLexer().getKind() != AsmToken::Identifier)
5625     return MatchOperand_NoMatch;
5626 
5627   StringRef Str = Parser.getTok().getString();
5628   int Slot = StringSwitch<int>(Str)
5629     .Case("p10", 0)
5630     .Case("p20", 1)
5631     .Case("p0", 2)
5632     .Default(-1);
5633 
5634   SMLoc S = Parser.getTok().getLoc();
5635   if (Slot == -1)
5636     return MatchOperand_ParseFail;
5637 
5638   Parser.Lex();
5639   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5640                                               AMDGPUOperand::ImmTyInterpSlot));
5641   return MatchOperand_Success;
5642 }
5643 
5644 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5645   if (getLexer().getKind() != AsmToken::Identifier)
5646     return MatchOperand_NoMatch;
5647 
5648   StringRef Str = Parser.getTok().getString();
5649   if (!Str.startswith("attr"))
5650     return MatchOperand_NoMatch;
5651 
5652   StringRef Chan = Str.take_back(2);
5653   int AttrChan = StringSwitch<int>(Chan)
5654     .Case(".x", 0)
5655     .Case(".y", 1)
5656     .Case(".z", 2)
5657     .Case(".w", 3)
5658     .Default(-1);
5659   if (AttrChan == -1)
5660     return MatchOperand_ParseFail;
5661 
5662   Str = Str.drop_back(2).drop_front(4);
5663 
5664   uint8_t Attr;
5665   if (Str.getAsInteger(10, Attr))
5666     return MatchOperand_ParseFail;
5667 
5668   SMLoc S = Parser.getTok().getLoc();
5669   Parser.Lex();
5670   if (Attr > 63) {
5671     Error(S, "out of bounds attr");
5672     return MatchOperand_ParseFail;
5673   }
5674 
5675   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5676 
5677   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5678                                               AMDGPUOperand::ImmTyInterpAttr));
5679   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5680                                               AMDGPUOperand::ImmTyAttrChan));
5681   return MatchOperand_Success;
5682 }
5683 
5684 //===----------------------------------------------------------------------===//
5685 // exp
5686 //===----------------------------------------------------------------------===//
5687 
5688 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5689                                                       uint8_t &Val) {
5690   if (Str == "null") {
5691     Val = 9;
5692     return MatchOperand_Success;
5693   }
5694 
5695   if (Str.startswith("mrt")) {
5696     Str = Str.drop_front(3);
5697     if (Str == "z") { // == mrtz
5698       Val = 8;
5699       return MatchOperand_Success;
5700     }
5701 
5702     if (Str.getAsInteger(10, Val))
5703       return MatchOperand_ParseFail;
5704 
5705     if (Val > 7) {
5706       Error(getLoc(), "invalid exp target");
5707       return MatchOperand_ParseFail;
5708     }
5709 
5710     return MatchOperand_Success;
5711   }
5712 
5713   if (Str.startswith("pos")) {
5714     Str = Str.drop_front(3);
5715     if (Str.getAsInteger(10, Val))
5716       return MatchOperand_ParseFail;
5717 
5718     if (Val > 4 || (Val == 4 && !isGFX10())) {
5719       Error(getLoc(), "invalid exp target");
5720       return MatchOperand_ParseFail;
5721     }
5722 
5723     Val += 12;
5724     return MatchOperand_Success;
5725   }
5726 
5727   if (isGFX10() && Str == "prim") {
5728     Val = 20;
5729     return MatchOperand_Success;
5730   }
5731 
5732   if (Str.startswith("param")) {
5733     Str = Str.drop_front(5);
5734     if (Str.getAsInteger(10, Val))
5735       return MatchOperand_ParseFail;
5736 
5737     if (Val >= 32) {
5738       Error(getLoc(), "invalid exp target");
5739       return MatchOperand_ParseFail;
5740     }
5741 
5742     Val += 32;
5743     return MatchOperand_Success;
5744   }
5745 
5746   if (Str.startswith("invalid_target_")) {
5747     Str = Str.drop_front(15);
5748     if (Str.getAsInteger(10, Val))
5749       return MatchOperand_ParseFail;
5750 
5751     Error(getLoc(), "invalid exp target");
5752     return MatchOperand_ParseFail;
5753   }
5754 
5755   return MatchOperand_NoMatch;
5756 }
5757 
5758 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5759   uint8_t Val;
5760   StringRef Str = Parser.getTok().getString();
5761 
5762   auto Res = parseExpTgtImpl(Str, Val);
5763   if (Res != MatchOperand_Success)
5764     return Res;
5765 
5766   SMLoc S = Parser.getTok().getLoc();
5767   Parser.Lex();
5768 
5769   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5770                                               AMDGPUOperand::ImmTyExpTgt));
5771   return MatchOperand_Success;
5772 }
5773 
5774 //===----------------------------------------------------------------------===//
5775 // parser helpers
5776 //===----------------------------------------------------------------------===//
5777 
5778 bool
5779 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5780   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5781 }
5782 
5783 bool
5784 AMDGPUAsmParser::isId(const StringRef Id) const {
5785   return isId(getToken(), Id);
5786 }
5787 
5788 bool
5789 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5790   return getTokenKind() == Kind;
5791 }
5792 
5793 bool
5794 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5795   if (isId(Id)) {
5796     lex();
5797     return true;
5798   }
5799   return false;
5800 }
5801 
5802 bool
5803 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5804   if (isId(Id) && peekToken().is(Kind)) {
5805     lex();
5806     lex();
5807     return true;
5808   }
5809   return false;
5810 }
5811 
5812 bool
5813 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5814   if (isToken(Kind)) {
5815     lex();
5816     return true;
5817   }
5818   return false;
5819 }
5820 
5821 bool
5822 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5823                            const StringRef ErrMsg) {
5824   if (!trySkipToken(Kind)) {
5825     Error(getLoc(), ErrMsg);
5826     return false;
5827   }
5828   return true;
5829 }
5830 
5831 bool
5832 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5833   return !getParser().parseAbsoluteExpression(Imm);
5834 }
5835 
5836 bool
5837 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5838   SMLoc S = getLoc();
5839 
5840   const MCExpr *Expr;
5841   if (Parser.parseExpression(Expr))
5842     return false;
5843 
5844   int64_t IntVal;
5845   if (Expr->evaluateAsAbsolute(IntVal)) {
5846     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5847   } else {
5848     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5849   }
5850   return true;
5851 }
5852 
5853 bool
5854 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5855   if (isToken(AsmToken::String)) {
5856     Val = getToken().getStringContents();
5857     lex();
5858     return true;
5859   } else {
5860     Error(getLoc(), ErrMsg);
5861     return false;
5862   }
5863 }
5864 
5865 bool
5866 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5867   if (isToken(AsmToken::Identifier)) {
5868     Val = getTokenStr();
5869     lex();
5870     return true;
5871   } else {
5872     Error(getLoc(), ErrMsg);
5873     return false;
5874   }
5875 }
5876 
5877 AsmToken
5878 AMDGPUAsmParser::getToken() const {
5879   return Parser.getTok();
5880 }
5881 
5882 AsmToken
5883 AMDGPUAsmParser::peekToken() {
5884   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
5885 }
5886 
5887 void
5888 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5889   auto TokCount = getLexer().peekTokens(Tokens);
5890 
5891   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5892     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5893 }
5894 
5895 AsmToken::TokenKind
5896 AMDGPUAsmParser::getTokenKind() const {
5897   return getLexer().getKind();
5898 }
5899 
5900 SMLoc
5901 AMDGPUAsmParser::getLoc() const {
5902   return getToken().getLoc();
5903 }
5904 
5905 StringRef
5906 AMDGPUAsmParser::getTokenStr() const {
5907   return getToken().getString();
5908 }
5909 
5910 void
5911 AMDGPUAsmParser::lex() {
5912   Parser.Lex();
5913 }
5914 
5915 //===----------------------------------------------------------------------===//
5916 // swizzle
5917 //===----------------------------------------------------------------------===//
5918 
5919 LLVM_READNONE
5920 static unsigned
5921 encodeBitmaskPerm(const unsigned AndMask,
5922                   const unsigned OrMask,
5923                   const unsigned XorMask) {
5924   using namespace llvm::AMDGPU::Swizzle;
5925 
5926   return BITMASK_PERM_ENC |
5927          (AndMask << BITMASK_AND_SHIFT) |
5928          (OrMask  << BITMASK_OR_SHIFT)  |
5929          (XorMask << BITMASK_XOR_SHIFT);
5930 }
5931 
5932 bool
5933 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5934                                       const unsigned MinVal,
5935                                       const unsigned MaxVal,
5936                                       const StringRef ErrMsg) {
5937   for (unsigned i = 0; i < OpNum; ++i) {
5938     if (!skipToken(AsmToken::Comma, "expected a comma")){
5939       return false;
5940     }
5941     SMLoc ExprLoc = Parser.getTok().getLoc();
5942     if (!parseExpr(Op[i])) {
5943       return false;
5944     }
5945     if (Op[i] < MinVal || Op[i] > MaxVal) {
5946       Error(ExprLoc, ErrMsg);
5947       return false;
5948     }
5949   }
5950 
5951   return true;
5952 }
5953 
5954 bool
5955 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5956   using namespace llvm::AMDGPU::Swizzle;
5957 
5958   int64_t Lane[LANE_NUM];
5959   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5960                            "expected a 2-bit lane id")) {
5961     Imm = QUAD_PERM_ENC;
5962     for (unsigned I = 0; I < LANE_NUM; ++I) {
5963       Imm |= Lane[I] << (LANE_SHIFT * I);
5964     }
5965     return true;
5966   }
5967   return false;
5968 }
5969 
5970 bool
5971 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5972   using namespace llvm::AMDGPU::Swizzle;
5973 
5974   SMLoc S = Parser.getTok().getLoc();
5975   int64_t GroupSize;
5976   int64_t LaneIdx;
5977 
5978   if (!parseSwizzleOperands(1, &GroupSize,
5979                             2, 32,
5980                             "group size must be in the interval [2,32]")) {
5981     return false;
5982   }
5983   if (!isPowerOf2_64(GroupSize)) {
5984     Error(S, "group size must be a power of two");
5985     return false;
5986   }
5987   if (parseSwizzleOperands(1, &LaneIdx,
5988                            0, GroupSize - 1,
5989                            "lane id must be in the interval [0,group size - 1]")) {
5990     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5991     return true;
5992   }
5993   return false;
5994 }
5995 
5996 bool
5997 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5998   using namespace llvm::AMDGPU::Swizzle;
5999 
6000   SMLoc S = Parser.getTok().getLoc();
6001   int64_t GroupSize;
6002 
6003   if (!parseSwizzleOperands(1, &GroupSize,
6004       2, 32, "group size must be in the interval [2,32]")) {
6005     return false;
6006   }
6007   if (!isPowerOf2_64(GroupSize)) {
6008     Error(S, "group size must be a power of two");
6009     return false;
6010   }
6011 
6012   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6013   return true;
6014 }
6015 
6016 bool
6017 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6018   using namespace llvm::AMDGPU::Swizzle;
6019 
6020   SMLoc S = Parser.getTok().getLoc();
6021   int64_t GroupSize;
6022 
6023   if (!parseSwizzleOperands(1, &GroupSize,
6024       1, 16, "group size must be in the interval [1,16]")) {
6025     return false;
6026   }
6027   if (!isPowerOf2_64(GroupSize)) {
6028     Error(S, "group size must be a power of two");
6029     return false;
6030   }
6031 
6032   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6033   return true;
6034 }
6035 
6036 bool
6037 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6038   using namespace llvm::AMDGPU::Swizzle;
6039 
6040   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6041     return false;
6042   }
6043 
6044   StringRef Ctl;
6045   SMLoc StrLoc = Parser.getTok().getLoc();
6046   if (!parseString(Ctl)) {
6047     return false;
6048   }
6049   if (Ctl.size() != BITMASK_WIDTH) {
6050     Error(StrLoc, "expected a 5-character mask");
6051     return false;
6052   }
6053 
6054   unsigned AndMask = 0;
6055   unsigned OrMask = 0;
6056   unsigned XorMask = 0;
6057 
6058   for (size_t i = 0; i < Ctl.size(); ++i) {
6059     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6060     switch(Ctl[i]) {
6061     default:
6062       Error(StrLoc, "invalid mask");
6063       return false;
6064     case '0':
6065       break;
6066     case '1':
6067       OrMask |= Mask;
6068       break;
6069     case 'p':
6070       AndMask |= Mask;
6071       break;
6072     case 'i':
6073       AndMask |= Mask;
6074       XorMask |= Mask;
6075       break;
6076     }
6077   }
6078 
6079   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6080   return true;
6081 }
6082 
6083 bool
6084 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6085 
6086   SMLoc OffsetLoc = Parser.getTok().getLoc();
6087 
6088   if (!parseExpr(Imm)) {
6089     return false;
6090   }
6091   if (!isUInt<16>(Imm)) {
6092     Error(OffsetLoc, "expected a 16-bit offset");
6093     return false;
6094   }
6095   return true;
6096 }
6097 
6098 bool
6099 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6100   using namespace llvm::AMDGPU::Swizzle;
6101 
6102   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6103 
6104     SMLoc ModeLoc = Parser.getTok().getLoc();
6105     bool Ok = false;
6106 
6107     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6108       Ok = parseSwizzleQuadPerm(Imm);
6109     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6110       Ok = parseSwizzleBitmaskPerm(Imm);
6111     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6112       Ok = parseSwizzleBroadcast(Imm);
6113     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6114       Ok = parseSwizzleSwap(Imm);
6115     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6116       Ok = parseSwizzleReverse(Imm);
6117     } else {
6118       Error(ModeLoc, "expected a swizzle mode");
6119     }
6120 
6121     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6122   }
6123 
6124   return false;
6125 }
6126 
6127 OperandMatchResultTy
6128 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6129   SMLoc S = Parser.getTok().getLoc();
6130   int64_t Imm = 0;
6131 
6132   if (trySkipId("offset")) {
6133 
6134     bool Ok = false;
6135     if (skipToken(AsmToken::Colon, "expected a colon")) {
6136       if (trySkipId("swizzle")) {
6137         Ok = parseSwizzleMacro(Imm);
6138       } else {
6139         Ok = parseSwizzleOffset(Imm);
6140       }
6141     }
6142 
6143     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6144 
6145     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6146   } else {
6147     // Swizzle "offset" operand is optional.
6148     // If it is omitted, try parsing other optional operands.
6149     return parseOptionalOpr(Operands);
6150   }
6151 }
6152 
6153 bool
6154 AMDGPUOperand::isSwizzle() const {
6155   return isImmTy(ImmTySwizzle);
6156 }
6157 
6158 //===----------------------------------------------------------------------===//
6159 // VGPR Index Mode
6160 //===----------------------------------------------------------------------===//
6161 
6162 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6163 
6164   using namespace llvm::AMDGPU::VGPRIndexMode;
6165 
6166   if (trySkipToken(AsmToken::RParen)) {
6167     return OFF;
6168   }
6169 
6170   int64_t Imm = 0;
6171 
6172   while (true) {
6173     unsigned Mode = 0;
6174     SMLoc S = Parser.getTok().getLoc();
6175 
6176     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6177       if (trySkipId(IdSymbolic[ModeId])) {
6178         Mode = 1 << ModeId;
6179         break;
6180       }
6181     }
6182 
6183     if (Mode == 0) {
6184       Error(S, (Imm == 0)?
6185                "expected a VGPR index mode or a closing parenthesis" :
6186                "expected a VGPR index mode");
6187       return UNDEF;
6188     }
6189 
6190     if (Imm & Mode) {
6191       Error(S, "duplicate VGPR index mode");
6192       return UNDEF;
6193     }
6194     Imm |= Mode;
6195 
6196     if (trySkipToken(AsmToken::RParen))
6197       break;
6198     if (!skipToken(AsmToken::Comma,
6199                    "expected a comma or a closing parenthesis"))
6200       return UNDEF;
6201   }
6202 
6203   return Imm;
6204 }
6205 
6206 OperandMatchResultTy
6207 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6208 
6209   using namespace llvm::AMDGPU::VGPRIndexMode;
6210 
6211   int64_t Imm = 0;
6212   SMLoc S = Parser.getTok().getLoc();
6213 
6214   if (getLexer().getKind() == AsmToken::Identifier &&
6215       Parser.getTok().getString() == "gpr_idx" &&
6216       getLexer().peekTok().is(AsmToken::LParen)) {
6217 
6218     Parser.Lex();
6219     Parser.Lex();
6220 
6221     Imm = parseGPRIdxMacro();
6222     if (Imm == UNDEF)
6223       return MatchOperand_ParseFail;
6224 
6225   } else {
6226     if (getParser().parseAbsoluteExpression(Imm))
6227       return MatchOperand_ParseFail;
6228     if (Imm < 0 || !isUInt<4>(Imm)) {
6229       Error(S, "invalid immediate: only 4-bit values are legal");
6230       return MatchOperand_ParseFail;
6231     }
6232   }
6233 
6234   Operands.push_back(
6235       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6236   return MatchOperand_Success;
6237 }
6238 
6239 bool AMDGPUOperand::isGPRIdxMode() const {
6240   return isImmTy(ImmTyGprIdxMode);
6241 }
6242 
6243 //===----------------------------------------------------------------------===//
6244 // sopp branch targets
6245 //===----------------------------------------------------------------------===//
6246 
6247 OperandMatchResultTy
6248 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6249 
6250   // Make sure we are not parsing something
6251   // that looks like a label or an expression but is not.
6252   // This will improve error messages.
6253   if (isRegister() || isModifier())
6254     return MatchOperand_NoMatch;
6255 
6256   if (!parseExpr(Operands))
6257     return MatchOperand_ParseFail;
6258 
6259   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6260   assert(Opr.isImm() || Opr.isExpr());
6261   SMLoc Loc = Opr.getStartLoc();
6262 
6263   // Currently we do not support arbitrary expressions as branch targets.
6264   // Only labels and absolute expressions are accepted.
6265   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6266     Error(Loc, "expected an absolute expression or a label");
6267   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6268     Error(Loc, "expected a 16-bit signed jump offset");
6269   }
6270 
6271   return MatchOperand_Success;
6272 }
6273 
6274 //===----------------------------------------------------------------------===//
6275 // Boolean holding registers
6276 //===----------------------------------------------------------------------===//
6277 
6278 OperandMatchResultTy
6279 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6280   return parseReg(Operands);
6281 }
6282 
6283 //===----------------------------------------------------------------------===//
6284 // mubuf
6285 //===----------------------------------------------------------------------===//
6286 
6287 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6288   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6289 }
6290 
6291 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6292   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6293 }
6294 
6295 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6296   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6297 }
6298 
6299 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6300                                const OperandVector &Operands,
6301                                bool IsAtomic,
6302                                bool IsAtomicReturn,
6303                                bool IsLds) {
6304   bool IsLdsOpcode = IsLds;
6305   bool HasLdsModifier = false;
6306   OptionalImmIndexMap OptionalIdx;
6307   assert(IsAtomicReturn ? IsAtomic : true);
6308   unsigned FirstOperandIdx = 1;
6309 
6310   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6311     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6312 
6313     // Add the register arguments
6314     if (Op.isReg()) {
6315       Op.addRegOperands(Inst, 1);
6316       // Insert a tied src for atomic return dst.
6317       // This cannot be postponed as subsequent calls to
6318       // addImmOperands rely on correct number of MC operands.
6319       if (IsAtomicReturn && i == FirstOperandIdx)
6320         Op.addRegOperands(Inst, 1);
6321       continue;
6322     }
6323 
6324     // Handle the case where soffset is an immediate
6325     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6326       Op.addImmOperands(Inst, 1);
6327       continue;
6328     }
6329 
6330     HasLdsModifier |= Op.isLDS();
6331 
6332     // Handle tokens like 'offen' which are sometimes hard-coded into the
6333     // asm string.  There are no MCInst operands for these.
6334     if (Op.isToken()) {
6335       continue;
6336     }
6337     assert(Op.isImm());
6338 
6339     // Handle optional arguments
6340     OptionalIdx[Op.getImmTy()] = i;
6341   }
6342 
6343   // This is a workaround for an llvm quirk which may result in an
6344   // incorrect instruction selection. Lds and non-lds versions of
6345   // MUBUF instructions are identical except that lds versions
6346   // have mandatory 'lds' modifier. However this modifier follows
6347   // optional modifiers and llvm asm matcher regards this 'lds'
6348   // modifier as an optional one. As a result, an lds version
6349   // of opcode may be selected even if it has no 'lds' modifier.
6350   if (IsLdsOpcode && !HasLdsModifier) {
6351     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6352     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6353       Inst.setOpcode(NoLdsOpcode);
6354       IsLdsOpcode = false;
6355     }
6356   }
6357 
6358   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6359   if (!IsAtomic) { // glc is hard-coded.
6360     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6361   }
6362   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6363 
6364   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6365     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6366   }
6367 
6368   if (isGFX10())
6369     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6370 }
6371 
6372 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6373   OptionalImmIndexMap OptionalIdx;
6374 
6375   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6376     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6377 
6378     // Add the register arguments
6379     if (Op.isReg()) {
6380       Op.addRegOperands(Inst, 1);
6381       continue;
6382     }
6383 
6384     // Handle the case where soffset is an immediate
6385     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6386       Op.addImmOperands(Inst, 1);
6387       continue;
6388     }
6389 
6390     // Handle tokens like 'offen' which are sometimes hard-coded into the
6391     // asm string.  There are no MCInst operands for these.
6392     if (Op.isToken()) {
6393       continue;
6394     }
6395     assert(Op.isImm());
6396 
6397     // Handle optional arguments
6398     OptionalIdx[Op.getImmTy()] = i;
6399   }
6400 
6401   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6402                         AMDGPUOperand::ImmTyOffset);
6403   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6404   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6405   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6406   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6407 
6408   if (isGFX10())
6409     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6410 }
6411 
6412 //===----------------------------------------------------------------------===//
6413 // mimg
6414 //===----------------------------------------------------------------------===//
6415 
6416 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6417                               bool IsAtomic) {
6418   unsigned I = 1;
6419   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6420   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6421     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6422   }
6423 
6424   if (IsAtomic) {
6425     // Add src, same as dst
6426     assert(Desc.getNumDefs() == 1);
6427     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6428   }
6429 
6430   OptionalImmIndexMap OptionalIdx;
6431 
6432   for (unsigned E = Operands.size(); I != E; ++I) {
6433     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6434 
6435     // Add the register arguments
6436     if (Op.isReg()) {
6437       Op.addRegOperands(Inst, 1);
6438     } else if (Op.isImmModifier()) {
6439       OptionalIdx[Op.getImmTy()] = I;
6440     } else if (!Op.isToken()) {
6441       llvm_unreachable("unexpected operand type");
6442     }
6443   }
6444 
6445   bool IsGFX10 = isGFX10();
6446 
6447   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6448   if (IsGFX10)
6449     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6450   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6451   if (IsGFX10)
6452     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6453   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6454   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6455   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6456   if (IsGFX10)
6457     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6458   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6459   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6460   if (!IsGFX10)
6461     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6462   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6463 }
6464 
6465 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6466   cvtMIMG(Inst, Operands, true);
6467 }
6468 
6469 //===----------------------------------------------------------------------===//
6470 // smrd
6471 //===----------------------------------------------------------------------===//
6472 
6473 bool AMDGPUOperand::isSMRDOffset8() const {
6474   return isImm() && isUInt<8>(getImm());
6475 }
6476 
6477 bool AMDGPUOperand::isSMEMOffset() const {
6478   return isImm(); // Offset range is checked later by validator.
6479 }
6480 
6481 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6482   // 32-bit literals are only supported on CI and we only want to use them
6483   // when the offset is > 8-bits.
6484   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6485 }
6486 
6487 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6488   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6489 }
6490 
6491 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6492   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6493 }
6494 
6495 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6496   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6497 }
6498 
6499 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6500   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6501 }
6502 
6503 //===----------------------------------------------------------------------===//
6504 // vop3
6505 //===----------------------------------------------------------------------===//
6506 
6507 static bool ConvertOmodMul(int64_t &Mul) {
6508   if (Mul != 1 && Mul != 2 && Mul != 4)
6509     return false;
6510 
6511   Mul >>= 1;
6512   return true;
6513 }
6514 
6515 static bool ConvertOmodDiv(int64_t &Div) {
6516   if (Div == 1) {
6517     Div = 0;
6518     return true;
6519   }
6520 
6521   if (Div == 2) {
6522     Div = 3;
6523     return true;
6524   }
6525 
6526   return false;
6527 }
6528 
6529 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6530   if (BoundCtrl == 0) {
6531     BoundCtrl = 1;
6532     return true;
6533   }
6534 
6535   if (BoundCtrl == -1) {
6536     BoundCtrl = 0;
6537     return true;
6538   }
6539 
6540   return false;
6541 }
6542 
6543 // Note: the order in this table matches the order of operands in AsmString.
6544 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6545   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6546   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6547   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6548   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6549   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6550   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6551   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6552   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6553   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6554   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6555   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6556   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6557   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6558   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6559   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6560   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6561   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6562   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6563   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6564   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6565   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6566   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6567   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6568   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6569   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6570   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6571   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6572   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6573   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6574   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6575   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6576   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6577   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6578   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6579   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6580   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6581   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6582   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6583   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6584   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6585   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6586   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6587   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6588 };
6589 
6590 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6591 
6592   OperandMatchResultTy res = parseOptionalOpr(Operands);
6593 
6594   // This is a hack to enable hardcoded mandatory operands which follow
6595   // optional operands.
6596   //
6597   // Current design assumes that all operands after the first optional operand
6598   // are also optional. However implementation of some instructions violates
6599   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6600   //
6601   // To alleviate this problem, we have to (implicitly) parse extra operands
6602   // to make sure autogenerated parser of custom operands never hit hardcoded
6603   // mandatory operands.
6604 
6605   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6606     if (res != MatchOperand_Success ||
6607         isToken(AsmToken::EndOfStatement))
6608       break;
6609 
6610     trySkipToken(AsmToken::Comma);
6611     res = parseOptionalOpr(Operands);
6612   }
6613 
6614   return res;
6615 }
6616 
6617 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6618   OperandMatchResultTy res;
6619   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6620     // try to parse any optional operand here
6621     if (Op.IsBit) {
6622       res = parseNamedBit(Op.Name, Operands, Op.Type);
6623     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6624       res = parseOModOperand(Operands);
6625     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6626                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6627                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6628       res = parseSDWASel(Operands, Op.Name, Op.Type);
6629     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6630       res = parseSDWADstUnused(Operands);
6631     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6632                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6633                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6634                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6635       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6636                                         Op.ConvertResult);
6637     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6638       res = parseDim(Operands);
6639     } else {
6640       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6641     }
6642     if (res != MatchOperand_NoMatch) {
6643       return res;
6644     }
6645   }
6646   return MatchOperand_NoMatch;
6647 }
6648 
6649 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6650   StringRef Name = Parser.getTok().getString();
6651   if (Name == "mul") {
6652     return parseIntWithPrefix("mul", Operands,
6653                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6654   }
6655 
6656   if (Name == "div") {
6657     return parseIntWithPrefix("div", Operands,
6658                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6659   }
6660 
6661   return MatchOperand_NoMatch;
6662 }
6663 
6664 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6665   cvtVOP3P(Inst, Operands);
6666 
6667   int Opc = Inst.getOpcode();
6668 
6669   int SrcNum;
6670   const int Ops[] = { AMDGPU::OpName::src0,
6671                       AMDGPU::OpName::src1,
6672                       AMDGPU::OpName::src2 };
6673   for (SrcNum = 0;
6674        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6675        ++SrcNum);
6676   assert(SrcNum > 0);
6677 
6678   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6679   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6680 
6681   if ((OpSel & (1 << SrcNum)) != 0) {
6682     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6683     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6684     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6685   }
6686 }
6687 
6688 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6689       // 1. This operand is input modifiers
6690   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6691       // 2. This is not last operand
6692       && Desc.NumOperands > (OpNum + 1)
6693       // 3. Next operand is register class
6694       && Desc.OpInfo[OpNum + 1].RegClass != -1
6695       // 4. Next register is not tied to any other operand
6696       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6697 }
6698 
6699 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6700 {
6701   OptionalImmIndexMap OptionalIdx;
6702   unsigned Opc = Inst.getOpcode();
6703 
6704   unsigned I = 1;
6705   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6706   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6707     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6708   }
6709 
6710   for (unsigned E = Operands.size(); I != E; ++I) {
6711     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6712     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6713       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6714     } else if (Op.isInterpSlot() ||
6715                Op.isInterpAttr() ||
6716                Op.isAttrChan()) {
6717       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6718     } else if (Op.isImmModifier()) {
6719       OptionalIdx[Op.getImmTy()] = I;
6720     } else {
6721       llvm_unreachable("unhandled operand type");
6722     }
6723   }
6724 
6725   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6726     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6727   }
6728 
6729   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6730     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6731   }
6732 
6733   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6734     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6735   }
6736 }
6737 
6738 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6739                               OptionalImmIndexMap &OptionalIdx) {
6740   unsigned Opc = Inst.getOpcode();
6741 
6742   unsigned I = 1;
6743   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6744   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6745     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6746   }
6747 
6748   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6749     // This instruction has src modifiers
6750     for (unsigned E = Operands.size(); I != E; ++I) {
6751       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6752       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6753         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6754       } else if (Op.isImmModifier()) {
6755         OptionalIdx[Op.getImmTy()] = I;
6756       } else if (Op.isRegOrImm()) {
6757         Op.addRegOrImmOperands(Inst, 1);
6758       } else {
6759         llvm_unreachable("unhandled operand type");
6760       }
6761     }
6762   } else {
6763     // No src modifiers
6764     for (unsigned E = Operands.size(); I != E; ++I) {
6765       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6766       if (Op.isMod()) {
6767         OptionalIdx[Op.getImmTy()] = I;
6768       } else {
6769         Op.addRegOrImmOperands(Inst, 1);
6770       }
6771     }
6772   }
6773 
6774   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6775     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6776   }
6777 
6778   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6779     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6780   }
6781 
6782   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6783   // it has src2 register operand that is tied to dst operand
6784   // we don't allow modifiers for this operand in assembler so src2_modifiers
6785   // should be 0.
6786   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6787       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6788       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6789       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6790       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6791       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6792       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6793     auto it = Inst.begin();
6794     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6795     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6796     ++it;
6797     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6798   }
6799 }
6800 
6801 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6802   OptionalImmIndexMap OptionalIdx;
6803   cvtVOP3(Inst, Operands, OptionalIdx);
6804 }
6805 
6806 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6807                                const OperandVector &Operands) {
6808   OptionalImmIndexMap OptIdx;
6809   const int Opc = Inst.getOpcode();
6810   const MCInstrDesc &Desc = MII.get(Opc);
6811 
6812   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6813 
6814   cvtVOP3(Inst, Operands, OptIdx);
6815 
6816   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6817     assert(!IsPacked);
6818     Inst.addOperand(Inst.getOperand(0));
6819   }
6820 
6821   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6822   // instruction, and then figure out where to actually put the modifiers
6823 
6824   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6825 
6826   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6827   if (OpSelHiIdx != -1) {
6828     int DefaultVal = IsPacked ? -1 : 0;
6829     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6830                           DefaultVal);
6831   }
6832 
6833   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6834   if (NegLoIdx != -1) {
6835     assert(IsPacked);
6836     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6837     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6838   }
6839 
6840   const int Ops[] = { AMDGPU::OpName::src0,
6841                       AMDGPU::OpName::src1,
6842                       AMDGPU::OpName::src2 };
6843   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6844                          AMDGPU::OpName::src1_modifiers,
6845                          AMDGPU::OpName::src2_modifiers };
6846 
6847   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6848 
6849   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6850   unsigned OpSelHi = 0;
6851   unsigned NegLo = 0;
6852   unsigned NegHi = 0;
6853 
6854   if (OpSelHiIdx != -1) {
6855     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6856   }
6857 
6858   if (NegLoIdx != -1) {
6859     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6860     NegLo = Inst.getOperand(NegLoIdx).getImm();
6861     NegHi = Inst.getOperand(NegHiIdx).getImm();
6862   }
6863 
6864   for (int J = 0; J < 3; ++J) {
6865     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6866     if (OpIdx == -1)
6867       break;
6868 
6869     uint32_t ModVal = 0;
6870 
6871     if ((OpSel & (1 << J)) != 0)
6872       ModVal |= SISrcMods::OP_SEL_0;
6873 
6874     if ((OpSelHi & (1 << J)) != 0)
6875       ModVal |= SISrcMods::OP_SEL_1;
6876 
6877     if ((NegLo & (1 << J)) != 0)
6878       ModVal |= SISrcMods::NEG;
6879 
6880     if ((NegHi & (1 << J)) != 0)
6881       ModVal |= SISrcMods::NEG_HI;
6882 
6883     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6884 
6885     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6886   }
6887 }
6888 
6889 //===----------------------------------------------------------------------===//
6890 // dpp
6891 //===----------------------------------------------------------------------===//
6892 
6893 bool AMDGPUOperand::isDPP8() const {
6894   return isImmTy(ImmTyDPP8);
6895 }
6896 
6897 bool AMDGPUOperand::isDPPCtrl() const {
6898   using namespace AMDGPU::DPP;
6899 
6900   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6901   if (result) {
6902     int64_t Imm = getImm();
6903     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6904            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6905            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6906            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6907            (Imm == DppCtrl::WAVE_SHL1) ||
6908            (Imm == DppCtrl::WAVE_ROL1) ||
6909            (Imm == DppCtrl::WAVE_SHR1) ||
6910            (Imm == DppCtrl::WAVE_ROR1) ||
6911            (Imm == DppCtrl::ROW_MIRROR) ||
6912            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6913            (Imm == DppCtrl::BCAST15) ||
6914            (Imm == DppCtrl::BCAST31) ||
6915            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6916            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6917   }
6918   return false;
6919 }
6920 
6921 //===----------------------------------------------------------------------===//
6922 // mAI
6923 //===----------------------------------------------------------------------===//
6924 
6925 bool AMDGPUOperand::isBLGP() const {
6926   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6927 }
6928 
6929 bool AMDGPUOperand::isCBSZ() const {
6930   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6931 }
6932 
6933 bool AMDGPUOperand::isABID() const {
6934   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6935 }
6936 
6937 bool AMDGPUOperand::isS16Imm() const {
6938   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6939 }
6940 
6941 bool AMDGPUOperand::isU16Imm() const {
6942   return isImm() && isUInt<16>(getImm());
6943 }
6944 
6945 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6946   if (!isGFX10())
6947     return MatchOperand_NoMatch;
6948 
6949   SMLoc S = Parser.getTok().getLoc();
6950 
6951   if (getLexer().isNot(AsmToken::Identifier))
6952     return MatchOperand_NoMatch;
6953   if (getLexer().getTok().getString() != "dim")
6954     return MatchOperand_NoMatch;
6955 
6956   Parser.Lex();
6957   if (getLexer().isNot(AsmToken::Colon))
6958     return MatchOperand_ParseFail;
6959 
6960   Parser.Lex();
6961 
6962   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6963   // integer.
6964   std::string Token;
6965   if (getLexer().is(AsmToken::Integer)) {
6966     SMLoc Loc = getLexer().getTok().getEndLoc();
6967     Token = std::string(getLexer().getTok().getString());
6968     Parser.Lex();
6969     if (getLexer().getTok().getLoc() != Loc)
6970       return MatchOperand_ParseFail;
6971   }
6972   if (getLexer().isNot(AsmToken::Identifier))
6973     return MatchOperand_ParseFail;
6974   Token += getLexer().getTok().getString();
6975 
6976   StringRef DimId = Token;
6977   if (DimId.startswith("SQ_RSRC_IMG_"))
6978     DimId = DimId.substr(12);
6979 
6980   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6981   if (!DimInfo)
6982     return MatchOperand_ParseFail;
6983 
6984   Parser.Lex();
6985 
6986   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6987                                               AMDGPUOperand::ImmTyDim));
6988   return MatchOperand_Success;
6989 }
6990 
6991 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6992   SMLoc S = Parser.getTok().getLoc();
6993   StringRef Prefix;
6994 
6995   if (getLexer().getKind() == AsmToken::Identifier) {
6996     Prefix = Parser.getTok().getString();
6997   } else {
6998     return MatchOperand_NoMatch;
6999   }
7000 
7001   if (Prefix != "dpp8")
7002     return parseDPPCtrl(Operands);
7003   if (!isGFX10())
7004     return MatchOperand_NoMatch;
7005 
7006   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7007 
7008   int64_t Sels[8];
7009 
7010   Parser.Lex();
7011   if (getLexer().isNot(AsmToken::Colon))
7012     return MatchOperand_ParseFail;
7013 
7014   Parser.Lex();
7015   if (getLexer().isNot(AsmToken::LBrac))
7016     return MatchOperand_ParseFail;
7017 
7018   Parser.Lex();
7019   if (getParser().parseAbsoluteExpression(Sels[0]))
7020     return MatchOperand_ParseFail;
7021   if (0 > Sels[0] || 7 < Sels[0])
7022     return MatchOperand_ParseFail;
7023 
7024   for (size_t i = 1; i < 8; ++i) {
7025     if (getLexer().isNot(AsmToken::Comma))
7026       return MatchOperand_ParseFail;
7027 
7028     Parser.Lex();
7029     if (getParser().parseAbsoluteExpression(Sels[i]))
7030       return MatchOperand_ParseFail;
7031     if (0 > Sels[i] || 7 < Sels[i])
7032       return MatchOperand_ParseFail;
7033   }
7034 
7035   if (getLexer().isNot(AsmToken::RBrac))
7036     return MatchOperand_ParseFail;
7037   Parser.Lex();
7038 
7039   unsigned DPP8 = 0;
7040   for (size_t i = 0; i < 8; ++i)
7041     DPP8 |= (Sels[i] << (i * 3));
7042 
7043   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7044   return MatchOperand_Success;
7045 }
7046 
7047 OperandMatchResultTy
7048 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7049   using namespace AMDGPU::DPP;
7050 
7051   SMLoc S = Parser.getTok().getLoc();
7052   StringRef Prefix;
7053   int64_t Int;
7054 
7055   if (getLexer().getKind() == AsmToken::Identifier) {
7056     Prefix = Parser.getTok().getString();
7057   } else {
7058     return MatchOperand_NoMatch;
7059   }
7060 
7061   if (Prefix == "row_mirror") {
7062     Int = DppCtrl::ROW_MIRROR;
7063     Parser.Lex();
7064   } else if (Prefix == "row_half_mirror") {
7065     Int = DppCtrl::ROW_HALF_MIRROR;
7066     Parser.Lex();
7067   } else {
7068     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7069     if (Prefix != "quad_perm"
7070         && Prefix != "row_shl"
7071         && Prefix != "row_shr"
7072         && Prefix != "row_ror"
7073         && Prefix != "wave_shl"
7074         && Prefix != "wave_rol"
7075         && Prefix != "wave_shr"
7076         && Prefix != "wave_ror"
7077         && Prefix != "row_bcast"
7078         && Prefix != "row_share"
7079         && Prefix != "row_xmask") {
7080       return MatchOperand_NoMatch;
7081     }
7082 
7083     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7084       return MatchOperand_NoMatch;
7085 
7086     if (!isVI() && !isGFX9() &&
7087         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7088          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7089          Prefix == "row_bcast"))
7090       return MatchOperand_NoMatch;
7091 
7092     Parser.Lex();
7093     if (getLexer().isNot(AsmToken::Colon))
7094       return MatchOperand_ParseFail;
7095 
7096     if (Prefix == "quad_perm") {
7097       // quad_perm:[%d,%d,%d,%d]
7098       Parser.Lex();
7099       if (getLexer().isNot(AsmToken::LBrac))
7100         return MatchOperand_ParseFail;
7101       Parser.Lex();
7102 
7103       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7104         return MatchOperand_ParseFail;
7105 
7106       for (int i = 0; i < 3; ++i) {
7107         if (getLexer().isNot(AsmToken::Comma))
7108           return MatchOperand_ParseFail;
7109         Parser.Lex();
7110 
7111         int64_t Temp;
7112         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7113           return MatchOperand_ParseFail;
7114         const int shift = i*2 + 2;
7115         Int += (Temp << shift);
7116       }
7117 
7118       if (getLexer().isNot(AsmToken::RBrac))
7119         return MatchOperand_ParseFail;
7120       Parser.Lex();
7121     } else {
7122       // sel:%d
7123       Parser.Lex();
7124       if (getParser().parseAbsoluteExpression(Int))
7125         return MatchOperand_ParseFail;
7126 
7127       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7128         Int |= DppCtrl::ROW_SHL0;
7129       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7130         Int |= DppCtrl::ROW_SHR0;
7131       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7132         Int |= DppCtrl::ROW_ROR0;
7133       } else if (Prefix == "wave_shl" && 1 == Int) {
7134         Int = DppCtrl::WAVE_SHL1;
7135       } else if (Prefix == "wave_rol" && 1 == Int) {
7136         Int = DppCtrl::WAVE_ROL1;
7137       } else if (Prefix == "wave_shr" && 1 == Int) {
7138         Int = DppCtrl::WAVE_SHR1;
7139       } else if (Prefix == "wave_ror" && 1 == Int) {
7140         Int = DppCtrl::WAVE_ROR1;
7141       } else if (Prefix == "row_bcast") {
7142         if (Int == 15) {
7143           Int = DppCtrl::BCAST15;
7144         } else if (Int == 31) {
7145           Int = DppCtrl::BCAST31;
7146         } else {
7147           return MatchOperand_ParseFail;
7148         }
7149       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7150         Int |= DppCtrl::ROW_SHARE_FIRST;
7151       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7152         Int |= DppCtrl::ROW_XMASK_FIRST;
7153       } else {
7154         return MatchOperand_ParseFail;
7155       }
7156     }
7157   }
7158 
7159   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7160   return MatchOperand_Success;
7161 }
7162 
7163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7164   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7165 }
7166 
7167 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7168   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7169 }
7170 
7171 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7172   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7173 }
7174 
7175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7176   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7177 }
7178 
7179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7180   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7181 }
7182 
7183 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7184   OptionalImmIndexMap OptionalIdx;
7185 
7186   unsigned I = 1;
7187   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7188   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7189     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7190   }
7191 
7192   int Fi = 0;
7193   for (unsigned E = Operands.size(); I != E; ++I) {
7194     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7195                                             MCOI::TIED_TO);
7196     if (TiedTo != -1) {
7197       assert((unsigned)TiedTo < Inst.getNumOperands());
7198       // handle tied old or src2 for MAC instructions
7199       Inst.addOperand(Inst.getOperand(TiedTo));
7200     }
7201     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7202     // Add the register arguments
7203     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7204       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7205       // Skip it.
7206       continue;
7207     }
7208 
7209     if (IsDPP8) {
7210       if (Op.isDPP8()) {
7211         Op.addImmOperands(Inst, 1);
7212       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7213         Op.addRegWithFPInputModsOperands(Inst, 2);
7214       } else if (Op.isFI()) {
7215         Fi = Op.getImm();
7216       } else if (Op.isReg()) {
7217         Op.addRegOperands(Inst, 1);
7218       } else {
7219         llvm_unreachable("Invalid operand type");
7220       }
7221     } else {
7222       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7223         Op.addRegWithFPInputModsOperands(Inst, 2);
7224       } else if (Op.isDPPCtrl()) {
7225         Op.addImmOperands(Inst, 1);
7226       } else if (Op.isImm()) {
7227         // Handle optional arguments
7228         OptionalIdx[Op.getImmTy()] = I;
7229       } else {
7230         llvm_unreachable("Invalid operand type");
7231       }
7232     }
7233   }
7234 
7235   if (IsDPP8) {
7236     using namespace llvm::AMDGPU::DPP;
7237     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7238   } else {
7239     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7240     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7241     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7242     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7243       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7244     }
7245   }
7246 }
7247 
7248 //===----------------------------------------------------------------------===//
7249 // sdwa
7250 //===----------------------------------------------------------------------===//
7251 
7252 OperandMatchResultTy
7253 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7254                               AMDGPUOperand::ImmTy Type) {
7255   using namespace llvm::AMDGPU::SDWA;
7256 
7257   SMLoc S = Parser.getTok().getLoc();
7258   StringRef Value;
7259   OperandMatchResultTy res;
7260 
7261   res = parseStringWithPrefix(Prefix, Value);
7262   if (res != MatchOperand_Success) {
7263     return res;
7264   }
7265 
7266   int64_t Int;
7267   Int = StringSwitch<int64_t>(Value)
7268         .Case("BYTE_0", SdwaSel::BYTE_0)
7269         .Case("BYTE_1", SdwaSel::BYTE_1)
7270         .Case("BYTE_2", SdwaSel::BYTE_2)
7271         .Case("BYTE_3", SdwaSel::BYTE_3)
7272         .Case("WORD_0", SdwaSel::WORD_0)
7273         .Case("WORD_1", SdwaSel::WORD_1)
7274         .Case("DWORD", SdwaSel::DWORD)
7275         .Default(0xffffffff);
7276   Parser.Lex(); // eat last token
7277 
7278   if (Int == 0xffffffff) {
7279     return MatchOperand_ParseFail;
7280   }
7281 
7282   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7283   return MatchOperand_Success;
7284 }
7285 
7286 OperandMatchResultTy
7287 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7288   using namespace llvm::AMDGPU::SDWA;
7289 
7290   SMLoc S = Parser.getTok().getLoc();
7291   StringRef Value;
7292   OperandMatchResultTy res;
7293 
7294   res = parseStringWithPrefix("dst_unused", Value);
7295   if (res != MatchOperand_Success) {
7296     return res;
7297   }
7298 
7299   int64_t Int;
7300   Int = StringSwitch<int64_t>(Value)
7301         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7302         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7303         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7304         .Default(0xffffffff);
7305   Parser.Lex(); // eat last token
7306 
7307   if (Int == 0xffffffff) {
7308     return MatchOperand_ParseFail;
7309   }
7310 
7311   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7312   return MatchOperand_Success;
7313 }
7314 
7315 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7316   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7317 }
7318 
7319 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7320   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7321 }
7322 
7323 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7324   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7325 }
7326 
7327 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7328   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7329 }
7330 
7331 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7332   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7333 }
7334 
7335 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7336                               uint64_t BasicInstType,
7337                               bool SkipDstVcc,
7338                               bool SkipSrcVcc) {
7339   using namespace llvm::AMDGPU::SDWA;
7340 
7341   OptionalImmIndexMap OptionalIdx;
7342   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7343   bool SkippedVcc = false;
7344 
7345   unsigned I = 1;
7346   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7347   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7348     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7349   }
7350 
7351   for (unsigned E = Operands.size(); I != E; ++I) {
7352     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7353     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7354         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7355       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7356       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7357       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7358       // Skip VCC only if we didn't skip it on previous iteration.
7359       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7360       if (BasicInstType == SIInstrFlags::VOP2 &&
7361           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7362            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7363         SkippedVcc = true;
7364         continue;
7365       } else if (BasicInstType == SIInstrFlags::VOPC &&
7366                  Inst.getNumOperands() == 0) {
7367         SkippedVcc = true;
7368         continue;
7369       }
7370     }
7371     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7372       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7373     } else if (Op.isImm()) {
7374       // Handle optional arguments
7375       OptionalIdx[Op.getImmTy()] = I;
7376     } else {
7377       llvm_unreachable("Invalid operand type");
7378     }
7379     SkippedVcc = false;
7380   }
7381 
7382   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7383       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7384       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7385     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7386     switch (BasicInstType) {
7387     case SIInstrFlags::VOP1:
7388       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7389       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7390         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7391       }
7392       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7393       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7394       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7395       break;
7396 
7397     case SIInstrFlags::VOP2:
7398       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7399       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7400         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7401       }
7402       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7403       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7404       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7405       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7406       break;
7407 
7408     case SIInstrFlags::VOPC:
7409       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7410         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7411       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7412       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7413       break;
7414 
7415     default:
7416       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7417     }
7418   }
7419 
7420   // special case v_mac_{f16, f32}:
7421   // it has src2 register operand that is tied to dst operand
7422   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7423       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7424     auto it = Inst.begin();
7425     std::advance(
7426       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7427     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7428   }
7429 }
7430 
7431 //===----------------------------------------------------------------------===//
7432 // mAI
7433 //===----------------------------------------------------------------------===//
7434 
7435 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7436   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7437 }
7438 
7439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7440   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7441 }
7442 
7443 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7444   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7445 }
7446 
7447 /// Force static initialization.
7448 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7449   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7450   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7451 }
7452 
7453 #define GET_REGISTER_MATCHER
7454 #define GET_MATCHER_IMPLEMENTATION
7455 #define GET_MNEMONIC_SPELL_CHECKER
7456 #include "AMDGPUGenAsmMatcher.inc"
7457 
7458 // This fuction should be defined after auto-generated include so that we have
7459 // MatchClassKind enum defined
7460 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7461                                                      unsigned Kind) {
7462   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7463   // But MatchInstructionImpl() expects to meet token and fails to validate
7464   // operand. This method checks if we are given immediate operand but expect to
7465   // get corresponding token.
7466   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7467   switch (Kind) {
7468   case MCK_addr64:
7469     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7470   case MCK_gds:
7471     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7472   case MCK_lds:
7473     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7474   case MCK_glc:
7475     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7476   case MCK_idxen:
7477     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7478   case MCK_offen:
7479     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7480   case MCK_SSrcB32:
7481     // When operands have expression values, they will return true for isToken,
7482     // because it is not possible to distinguish between a token and an
7483     // expression at parse time. MatchInstructionImpl() will always try to
7484     // match an operand as a token, when isToken returns true, and when the
7485     // name of the expression is not a valid token, the match will fail,
7486     // so we need to handle it here.
7487     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7488   case MCK_SSrcF32:
7489     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7490   case MCK_SoppBrTarget:
7491     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7492   case MCK_VReg32OrOff:
7493     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7494   case MCK_InterpSlot:
7495     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7496   case MCK_Attr:
7497     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7498   case MCK_AttrChan:
7499     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7500   case MCK_ImmSMEMOffset:
7501     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7502   case MCK_SReg_64:
7503   case MCK_SReg_64_XEXEC:
7504     // Null is defined as a 32-bit register but
7505     // it should also be enabled with 64-bit operands.
7506     // The following code enables it for SReg_64 operands
7507     // used as source and destination. Remaining source
7508     // operands are handled in isInlinableImm.
7509     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7510   default:
7511     return Match_InvalidOperand;
7512   }
7513 }
7514 
7515 //===----------------------------------------------------------------------===//
7516 // endpgm
7517 //===----------------------------------------------------------------------===//
7518 
7519 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7520   SMLoc S = Parser.getTok().getLoc();
7521   int64_t Imm = 0;
7522 
7523   if (!parseExpr(Imm)) {
7524     // The operand is optional, if not present default to 0
7525     Imm = 0;
7526   }
7527 
7528   if (!isUInt<16>(Imm)) {
7529     Error(S, "expected a 16-bit value");
7530     return MatchOperand_ParseFail;
7531   }
7532 
7533   Operands.push_back(
7534       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7535   return MatchOperand_Success;
7536 }
7537 
7538 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7539