1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isSymbolRefExpr();
224   }
225 
226   bool isSymbolRefExpr() const {
227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228   }
229 
230   bool isImm() const override {
231     return Kind == Immediate;
232   }
233 
234   bool isInlinableImm(MVT type) const;
235   bool isLiteralImm(MVT type) const;
236 
237   bool isRegKind() const {
238     return Kind == Register;
239   }
240 
241   bool isReg() const override {
242     return isRegKind() && !hasModifiers();
243   }
244 
245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247   }
248 
249   bool isRegOrImmWithInt16InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251   }
252 
253   bool isRegOrImmWithInt32InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255   }
256 
257   bool isRegOrImmWithInt64InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259   }
260 
261   bool isRegOrImmWithFP16InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263   }
264 
265   bool isRegOrImmWithFP32InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267   }
268 
269   bool isRegOrImmWithFP64InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271   }
272 
273   bool isVReg() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275            isRegClass(AMDGPU::VReg_64RegClassID) ||
276            isRegClass(AMDGPU::VReg_96RegClassID) ||
277            isRegClass(AMDGPU::VReg_128RegClassID) ||
278            isRegClass(AMDGPU::VReg_160RegClassID) ||
279            isRegClass(AMDGPU::VReg_256RegClassID) ||
280            isRegClass(AMDGPU::VReg_512RegClassID) ||
281            isRegClass(AMDGPU::VReg_1024RegClassID);
282   }
283 
284   bool isVReg32() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID);
286   }
287 
288   bool isVReg32OrOff() const {
289     return isOff() || isVReg32();
290   }
291 
292   bool isSDWAOperand(MVT type) const;
293   bool isSDWAFP16Operand() const;
294   bool isSDWAFP32Operand() const;
295   bool isSDWAInt16Operand() const;
296   bool isSDWAInt32Operand() const;
297 
298   bool isImmTy(ImmTy ImmT) const {
299     return isImm() && Imm.Type == ImmT;
300   }
301 
302   bool isImmModifier() const {
303     return isImm() && Imm.Type != ImmTyNone;
304   }
305 
306   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308   bool isDMask() const { return isImmTy(ImmTyDMask); }
309   bool isDim() const { return isImmTy(ImmTyDim); }
310   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311   bool isDA() const { return isImmTy(ImmTyDA); }
312   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313   bool isLWE() const { return isImmTy(ImmTyLWE); }
314   bool isOff() const { return isImmTy(ImmTyOff); }
315   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318   bool isOffen() const { return isImmTy(ImmTyOffen); }
319   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326   bool isGDS() const { return isImmTy(ImmTyGDS); }
327   bool isLDS() const { return isImmTy(ImmTyLDS); }
328   bool isDLC() const { return isImmTy(ImmTyDLC); }
329   bool isGLC() const { return isImmTy(ImmTyGLC); }
330   bool isSLC() const { return isImmTy(ImmTySLC); }
331   bool isTFE() const { return isImmTy(ImmTyTFE); }
332   bool isD16() const { return isImmTy(ImmTyD16); }
333   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337   bool isFI() const { return isImmTy(ImmTyDppFi); }
338   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349   bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351   bool isMod() const {
352     return isClampSI() || isOModSI();
353   }
354 
355   bool isRegOrImm() const {
356     return isReg() || isImm();
357   }
358 
359   bool isRegClass(unsigned RCID) const;
360 
361   bool isInlineValue() const;
362 
363   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365   }
366 
367   bool isSCSrcB16() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369   }
370 
371   bool isSCSrcV2B16() const {
372     return isSCSrcB16();
373   }
374 
375   bool isSCSrcB32() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377   }
378 
379   bool isSCSrcB64() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381   }
382 
383   bool isBoolReg() const;
384 
385   bool isSCSrcF16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387   }
388 
389   bool isSCSrcV2F16() const {
390     return isSCSrcF16();
391   }
392 
393   bool isSCSrcF32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395   }
396 
397   bool isSCSrcF64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399   }
400 
401   bool isSSrcB32() const {
402     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403   }
404 
405   bool isSSrcB16() const {
406     return isSCSrcB16() || isLiteralImm(MVT::i16);
407   }
408 
409   bool isSSrcV2B16() const {
410     llvm_unreachable("cannot happen");
411     return isSSrcB16();
412   }
413 
414   bool isSSrcB64() const {
415     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416     // See isVSrc64().
417     return isSCSrcB64() || isLiteralImm(MVT::i64);
418   }
419 
420   bool isSSrcF32() const {
421     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422   }
423 
424   bool isSSrcF64() const {
425     return isSCSrcB64() || isLiteralImm(MVT::f64);
426   }
427 
428   bool isSSrcF16() const {
429     return isSCSrcB16() || isLiteralImm(MVT::f16);
430   }
431 
432   bool isSSrcV2F16() const {
433     llvm_unreachable("cannot happen");
434     return isSSrcF16();
435   }
436 
437   bool isSSrcOrLdsB32() const {
438     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439            isLiteralImm(MVT::i32) || isExpr();
440   }
441 
442   bool isVCSrcB32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444   }
445 
446   bool isVCSrcB64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448   }
449 
450   bool isVCSrcB16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452   }
453 
454   bool isVCSrcV2B16() const {
455     return isVCSrcB16();
456   }
457 
458   bool isVCSrcF32() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460   }
461 
462   bool isVCSrcF64() const {
463     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464   }
465 
466   bool isVCSrcF16() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468   }
469 
470   bool isVCSrcV2F16() const {
471     return isVCSrcF16();
472   }
473 
474   bool isVSrcB32() const {
475     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476   }
477 
478   bool isVSrcB64() const {
479     return isVCSrcF64() || isLiteralImm(MVT::i64);
480   }
481 
482   bool isVSrcB16() const {
483     return isVCSrcF16() || isLiteralImm(MVT::i16);
484   }
485 
486   bool isVSrcV2B16() const {
487     return isVSrcB16() || isLiteralImm(MVT::v2i16);
488   }
489 
490   bool isVSrcF32() const {
491     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492   }
493 
494   bool isVSrcF64() const {
495     return isVCSrcF64() || isLiteralImm(MVT::f64);
496   }
497 
498   bool isVSrcF16() const {
499     return isVCSrcF16() || isLiteralImm(MVT::f16);
500   }
501 
502   bool isVSrcV2F16() const {
503     return isVSrcF16() || isLiteralImm(MVT::v2f16);
504   }
505 
506   bool isVISrcB32() const {
507     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508   }
509 
510   bool isVISrcB16() const {
511     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512   }
513 
514   bool isVISrcV2B16() const {
515     return isVISrcB16();
516   }
517 
518   bool isVISrcF32() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520   }
521 
522   bool isVISrcF16() const {
523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524   }
525 
526   bool isVISrcV2F16() const {
527     return isVISrcF16() || isVISrcB32();
528   }
529 
530   bool isAISrcB32() const {
531     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532   }
533 
534   bool isAISrcB16() const {
535     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536   }
537 
538   bool isAISrcV2B16() const {
539     return isAISrcB16();
540   }
541 
542   bool isAISrcF32() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544   }
545 
546   bool isAISrcF16() const {
547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548   }
549 
550   bool isAISrcV2F16() const {
551     return isAISrcF16() || isAISrcB32();
552   }
553 
554   bool isAISrc_128B32() const {
555     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556   }
557 
558   bool isAISrc_128B16() const {
559     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560   }
561 
562   bool isAISrc_128V2B16() const {
563     return isAISrc_128B16();
564   }
565 
566   bool isAISrc_128F32() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568   }
569 
570   bool isAISrc_128F16() const {
571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572   }
573 
574   bool isAISrc_128V2F16() const {
575     return isAISrc_128F16() || isAISrc_128B32();
576   }
577 
578   bool isAISrc_512B32() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580   }
581 
582   bool isAISrc_512B16() const {
583     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584   }
585 
586   bool isAISrc_512V2B16() const {
587     return isAISrc_512B16();
588   }
589 
590   bool isAISrc_512F32() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592   }
593 
594   bool isAISrc_512F16() const {
595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596   }
597 
598   bool isAISrc_512V2F16() const {
599     return isAISrc_512F16() || isAISrc_512B32();
600   }
601 
602   bool isAISrc_1024B32() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604   }
605 
606   bool isAISrc_1024B16() const {
607     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608   }
609 
610   bool isAISrc_1024V2B16() const {
611     return isAISrc_1024B16();
612   }
613 
614   bool isAISrc_1024F32() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616   }
617 
618   bool isAISrc_1024F16() const {
619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620   }
621 
622   bool isAISrc_1024V2F16() const {
623     return isAISrc_1024F16() || isAISrc_1024B32();
624   }
625 
626   bool isKImmFP32() const {
627     return isLiteralImm(MVT::f32);
628   }
629 
630   bool isKImmFP16() const {
631     return isLiteralImm(MVT::f16);
632   }
633 
634   bool isMem() const override {
635     return false;
636   }
637 
638   bool isExpr() const {
639     return Kind == Expression;
640   }
641 
642   bool isSoppBrTarget() const {
643     return isExpr() || isImm();
644   }
645 
646   bool isSWaitCnt() const;
647   bool isHwreg() const;
648   bool isSendMsg() const;
649   bool isSwizzle() const;
650   bool isSMRDOffset8() const;
651   bool isSMRDOffset20() const;
652   bool isSMRDLiteralOffset() const;
653   bool isDPP8() const;
654   bool isDPPCtrl() const;
655   bool isBLGP() const;
656   bool isCBSZ() const;
657   bool isABID() const;
658   bool isGPRIdxMode() const;
659   bool isS16Imm() const;
660   bool isU16Imm() const;
661   bool isEndpgm() const;
662 
663   StringRef getExpressionAsToken() const {
664     assert(isExpr());
665     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666     return S->getSymbol().getName();
667   }
668 
669   StringRef getToken() const {
670     assert(isToken());
671 
672     if (Kind == Expression)
673       return getExpressionAsToken();
674 
675     return StringRef(Tok.Data, Tok.Length);
676   }
677 
678   int64_t getImm() const {
679     assert(isImm());
680     return Imm.Val;
681   }
682 
683   ImmTy getImmTy() const {
684     assert(isImm());
685     return Imm.Type;
686   }
687 
688   unsigned getReg() const override {
689     assert(isRegKind());
690     return Reg.RegNo;
691   }
692 
693   SMLoc getStartLoc() const override {
694     return StartLoc;
695   }
696 
697   SMLoc getEndLoc() const override {
698     return EndLoc;
699   }
700 
701   SMRange getLocRange() const {
702     return SMRange(StartLoc, EndLoc);
703   }
704 
705   Modifiers getModifiers() const {
706     assert(isRegKind() || isImmTy(ImmTyNone));
707     return isRegKind() ? Reg.Mods : Imm.Mods;
708   }
709 
710   void setModifiers(Modifiers Mods) {
711     assert(isRegKind() || isImmTy(ImmTyNone));
712     if (isRegKind())
713       Reg.Mods = Mods;
714     else
715       Imm.Mods = Mods;
716   }
717 
718   bool hasModifiers() const {
719     return getModifiers().hasModifiers();
720   }
721 
722   bool hasFPModifiers() const {
723     return getModifiers().hasFPModifiers();
724   }
725 
726   bool hasIntModifiers() const {
727     return getModifiers().hasIntModifiers();
728   }
729 
730   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736   template <unsigned Bitwidth>
737   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740     addKImmFPOperands<16>(Inst, N);
741   }
742 
743   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744     addKImmFPOperands<32>(Inst, N);
745   }
746 
747   void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750     addRegOperands(Inst, N);
751   }
752 
753   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754     if (isRegKind())
755       addRegOperands(Inst, N);
756     else if (isExpr())
757       Inst.addOperand(MCOperand::createExpr(Expr));
758     else
759       addImmOperands(Inst, N);
760   }
761 
762   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763     Modifiers Mods = getModifiers();
764     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765     if (isRegKind()) {
766       addRegOperands(Inst, N);
767     } else {
768       addImmOperands(Inst, N, false);
769     }
770   }
771 
772   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773     assert(!hasIntModifiers());
774     addRegOrImmWithInputModsOperands(Inst, N);
775   }
776 
777   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778     assert(!hasFPModifiers());
779     addRegOrImmWithInputModsOperands(Inst, N);
780   }
781 
782   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     assert(isRegKind());
786     addRegOperands(Inst, N);
787   }
788 
789   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790     assert(!hasIntModifiers());
791     addRegWithInputModsOperands(Inst, N);
792   }
793 
794   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795     assert(!hasFPModifiers());
796     addRegWithInputModsOperands(Inst, N);
797   }
798 
799   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800     if (isImm())
801       addImmOperands(Inst, N);
802     else {
803       assert(isExpr());
804       Inst.addOperand(MCOperand::createExpr(Expr));
805     }
806   }
807 
808   static void printImmTy(raw_ostream& OS, ImmTy Type) {
809     switch (Type) {
810     case ImmTyNone: OS << "None"; break;
811     case ImmTyGDS: OS << "GDS"; break;
812     case ImmTyLDS: OS << "LDS"; break;
813     case ImmTyOffen: OS << "Offen"; break;
814     case ImmTyIdxen: OS << "Idxen"; break;
815     case ImmTyAddr64: OS << "Addr64"; break;
816     case ImmTyOffset: OS << "Offset"; break;
817     case ImmTyInstOffset: OS << "InstOffset"; break;
818     case ImmTyOffset0: OS << "Offset0"; break;
819     case ImmTyOffset1: OS << "Offset1"; break;
820     case ImmTyDLC: OS << "DLC"; break;
821     case ImmTyGLC: OS << "GLC"; break;
822     case ImmTySLC: OS << "SLC"; break;
823     case ImmTyTFE: OS << "TFE"; break;
824     case ImmTyD16: OS << "D16"; break;
825     case ImmTyFORMAT: OS << "FORMAT"; break;
826     case ImmTyClampSI: OS << "ClampSI"; break;
827     case ImmTyOModSI: OS << "OModSI"; break;
828     case ImmTyDPP8: OS << "DPP8"; break;
829     case ImmTyDppCtrl: OS << "DppCtrl"; break;
830     case ImmTyDppRowMask: OS << "DppRowMask"; break;
831     case ImmTyDppBankMask: OS << "DppBankMask"; break;
832     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833     case ImmTyDppFi: OS << "FI"; break;
834     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838     case ImmTyDMask: OS << "DMask"; break;
839     case ImmTyDim: OS << "Dim"; break;
840     case ImmTyUNorm: OS << "UNorm"; break;
841     case ImmTyDA: OS << "DA"; break;
842     case ImmTyR128A16: OS << "R128A16"; break;
843     case ImmTyLWE: OS << "LWE"; break;
844     case ImmTyOff: OS << "Off"; break;
845     case ImmTyExpTgt: OS << "ExpTgt"; break;
846     case ImmTyExpCompr: OS << "ExpCompr"; break;
847     case ImmTyExpVM: OS << "ExpVM"; break;
848     case ImmTyHwreg: OS << "Hwreg"; break;
849     case ImmTySendMsg: OS << "SendMsg"; break;
850     case ImmTyInterpSlot: OS << "InterpSlot"; break;
851     case ImmTyInterpAttr: OS << "InterpAttr"; break;
852     case ImmTyAttrChan: OS << "AttrChan"; break;
853     case ImmTyOpSel: OS << "OpSel"; break;
854     case ImmTyOpSelHi: OS << "OpSelHi"; break;
855     case ImmTyNegLo: OS << "NegLo"; break;
856     case ImmTyNegHi: OS << "NegHi"; break;
857     case ImmTySwizzle: OS << "Swizzle"; break;
858     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859     case ImmTyHigh: OS << "High"; break;
860     case ImmTyBLGP: OS << "BLGP"; break;
861     case ImmTyCBSZ: OS << "CBSZ"; break;
862     case ImmTyABID: OS << "ABID"; break;
863     case ImmTyEndpgm: OS << "Endpgm"; break;
864     }
865   }
866 
867   void print(raw_ostream &OS) const override {
868     switch (Kind) {
869     case Register:
870       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871       break;
872     case Immediate:
873       OS << '<' << getImm();
874       if (getImmTy() != ImmTyNone) {
875         OS << " type: "; printImmTy(OS, getImmTy());
876       }
877       OS << " mods: " << Imm.Mods << '>';
878       break;
879     case Token:
880       OS << '\'' << getToken() << '\'';
881       break;
882     case Expression:
883       OS << "<expr " << *Expr << '>';
884       break;
885     }
886   }
887 
888   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889                                       int64_t Val, SMLoc Loc,
890                                       ImmTy Type = ImmTyNone,
891                                       bool IsFPImm = false) {
892     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893     Op->Imm.Val = Val;
894     Op->Imm.IsFPImm = IsFPImm;
895     Op->Imm.Type = Type;
896     Op->Imm.Mods = Modifiers();
897     Op->StartLoc = Loc;
898     Op->EndLoc = Loc;
899     return Op;
900   }
901 
902   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903                                         StringRef Str, SMLoc Loc,
904                                         bool HasExplicitEncodingSize = true) {
905     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906     Res->Tok.Data = Str.data();
907     Res->Tok.Length = Str.size();
908     Res->StartLoc = Loc;
909     Res->EndLoc = Loc;
910     return Res;
911   }
912 
913   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914                                       unsigned RegNo, SMLoc S,
915                                       SMLoc E) {
916     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917     Op->Reg.RegNo = RegNo;
918     Op->Reg.Mods = Modifiers();
919     Op->StartLoc = S;
920     Op->EndLoc = E;
921     return Op;
922   }
923 
924   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925                                        const class MCExpr *Expr, SMLoc S) {
926     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927     Op->Expr = Expr;
928     Op->StartLoc = S;
929     Op->EndLoc = S;
930     return Op;
931   }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936   return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947   int SgprIndexUnusedMin = -1;
948   int VgprIndexUnusedMin = -1;
949   MCContext *Ctx = nullptr;
950 
951   void usesSgprAt(int i) {
952     if (i >= SgprIndexUnusedMin) {
953       SgprIndexUnusedMin = ++i;
954       if (Ctx) {
955         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957       }
958     }
959   }
960 
961   void usesVgprAt(int i) {
962     if (i >= VgprIndexUnusedMin) {
963       VgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971 public:
972   KernelScopeInfo() = default;
973 
974   void initialize(MCContext &Context) {
975     Ctx = &Context;
976     usesSgprAt(SgprIndexUnusedMin = -1);
977     usesVgprAt(VgprIndexUnusedMin = -1);
978   }
979 
980   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981     switch (RegKind) {
982       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983       case IS_AGPR: // fall through
984       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985       default: break;
986     }
987   }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991   MCAsmParser &Parser;
992 
993   // Number of extra operands parsed after the first optional operand.
994   // This may be necessary to skip hardcoded mandatory operands.
995   static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997   unsigned ForcedEncodingSize = 0;
998   bool ForcedDPP = false;
999   bool ForcedSDWA = false;
1000   KernelScopeInfo KernelScope;
1001 
1002   /// @name Auto-generated Match Functions
1003   /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008   /// }
1009 
1010 private:
1011   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012   bool OutOfRangeError(SMRange Range);
1013   /// Calculate VGPR/SGPR blocks required for given target, reserved
1014   /// registers, and user-specified NextFreeXGPR values.
1015   ///
1016   /// \param Features [in] Target features, used for bug corrections.
1017   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021   /// descriptor field, if valid.
1022   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026   /// \param VGPRBlocks [out] Result VGPR block count.
1027   /// \param SGPRBlocks [out] Result SGPR block count.
1028   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029                           bool FlatScrUsed, bool XNACKUsed,
1030                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031                           SMRange VGPRRange, unsigned NextFreeSGPR,
1032                           SMRange SGPRRange, unsigned &VGPRBlocks,
1033                           unsigned &SGPRBlocks);
1034   bool ParseDirectiveAMDGCNTarget();
1035   bool ParseDirectiveAMDHSAKernel();
1036   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037   bool ParseDirectiveHSACodeObjectVersion();
1038   bool ParseDirectiveHSACodeObjectISA();
1039   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040   bool ParseDirectiveAMDKernelCodeT();
1041   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042   bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044   bool ParseDirectiveISAVersion();
1045   bool ParseDirectiveHSAMetadata();
1046   bool ParseDirectivePALMetadataBegin();
1047   bool ParseDirectivePALMetadata();
1048   bool ParseDirectiveAMDGPULDS();
1049 
1050   /// Common code to parse out a block of text (typically YAML) between start and
1051   /// end directives.
1052   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053                            const char *AssemblerDirectiveEnd,
1054                            std::string &CollectString);
1055 
1056   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057                              RegisterKind RegKind, unsigned Reg1);
1058   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1059                            unsigned& RegNum, unsigned& RegWidth);
1060   unsigned ParseRegularReg(RegisterKind &RegKind,
1061                            unsigned &RegNum,
1062                            unsigned &RegWidth);
1063   unsigned ParseSpecialReg(RegisterKind &RegKind,
1064                            unsigned &RegNum,
1065                            unsigned &RegWidth);
1066   unsigned ParseRegList(RegisterKind &RegKind,
1067                         unsigned &RegNum,
1068                         unsigned &RegWidth);
1069   bool ParseRegRange(unsigned& Num, unsigned& Width);
1070   unsigned getRegularReg(RegisterKind RegKind,
1071                          unsigned RegNum,
1072                          unsigned RegWidth);
1073 
1074   bool isRegister();
1075   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1076   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1077   void initializeGprCountSymbol(RegisterKind RegKind);
1078   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1079                              unsigned RegWidth);
1080   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1081                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1082   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1083                  bool IsGdsHardcoded);
1084 
1085 public:
1086   enum AMDGPUMatchResultTy {
1087     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1088   };
1089   enum OperandMode {
1090     OperandMode_Default,
1091     OperandMode_NSA,
1092   };
1093 
1094   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1095 
1096   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1097                const MCInstrInfo &MII,
1098                const MCTargetOptions &Options)
1099       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1100     MCAsmParserExtension::Initialize(Parser);
1101 
1102     if (getFeatureBits().none()) {
1103       // Set default features.
1104       copySTI().ToggleFeature("southern-islands");
1105     }
1106 
1107     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1108 
1109     {
1110       // TODO: make those pre-defined variables read-only.
1111       // Currently there is none suitable machinery in the core llvm-mc for this.
1112       // MCSymbol::isRedefinable is intended for another purpose, and
1113       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1114       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1115       MCContext &Ctx = getContext();
1116       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1117         MCSymbol *Sym =
1118             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1119         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1120         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1121         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1122         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1123         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1124       } else {
1125         MCSymbol *Sym =
1126             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1127         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1128         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1129         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1130         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1131         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1132       }
1133       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1134         initializeGprCountSymbol(IS_VGPR);
1135         initializeGprCountSymbol(IS_SGPR);
1136       } else
1137         KernelScope.initialize(getContext());
1138     }
1139   }
1140 
1141   bool hasXNACK() const {
1142     return AMDGPU::hasXNACK(getSTI());
1143   }
1144 
1145   bool hasMIMG_R128() const {
1146     return AMDGPU::hasMIMG_R128(getSTI());
1147   }
1148 
1149   bool hasPackedD16() const {
1150     return AMDGPU::hasPackedD16(getSTI());
1151   }
1152 
1153   bool isSI() const {
1154     return AMDGPU::isSI(getSTI());
1155   }
1156 
1157   bool isCI() const {
1158     return AMDGPU::isCI(getSTI());
1159   }
1160 
1161   bool isVI() const {
1162     return AMDGPU::isVI(getSTI());
1163   }
1164 
1165   bool isGFX9() const {
1166     return AMDGPU::isGFX9(getSTI());
1167   }
1168 
1169   bool isGFX10() const {
1170     return AMDGPU::isGFX10(getSTI());
1171   }
1172 
1173   bool hasInv2PiInlineImm() const {
1174     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1175   }
1176 
1177   bool hasFlatOffsets() const {
1178     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1179   }
1180 
1181   bool hasSGPR102_SGPR103() const {
1182     return !isVI() && !isGFX9();
1183   }
1184 
1185   bool hasSGPR104_SGPR105() const {
1186     return isGFX10();
1187   }
1188 
1189   bool hasIntClamp() const {
1190     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1191   }
1192 
1193   AMDGPUTargetStreamer &getTargetStreamer() {
1194     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1195     return static_cast<AMDGPUTargetStreamer &>(TS);
1196   }
1197 
1198   const MCRegisterInfo *getMRI() const {
1199     // We need this const_cast because for some reason getContext() is not const
1200     // in MCAsmParser.
1201     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1202   }
1203 
1204   const MCInstrInfo *getMII() const {
1205     return &MII;
1206   }
1207 
1208   const FeatureBitset &getFeatureBits() const {
1209     return getSTI().getFeatureBits();
1210   }
1211 
1212   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1213   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1214   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1215 
1216   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1217   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1218   bool isForcedDPP() const { return ForcedDPP; }
1219   bool isForcedSDWA() const { return ForcedSDWA; }
1220   ArrayRef<unsigned> getMatchedVariants() const;
1221 
1222   std::unique_ptr<AMDGPUOperand> parseRegister();
1223   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1224   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1225   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1226                                       unsigned Kind) override;
1227   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1228                                OperandVector &Operands, MCStreamer &Out,
1229                                uint64_t &ErrorInfo,
1230                                bool MatchingInlineAsm) override;
1231   bool ParseDirective(AsmToken DirectiveID) override;
1232   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1233                                     OperandMode Mode = OperandMode_Default);
1234   StringRef parseMnemonicSuffix(StringRef Name);
1235   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1236                         SMLoc NameLoc, OperandVector &Operands) override;
1237   //bool ProcessInstruction(MCInst &Inst);
1238 
1239   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1240 
1241   OperandMatchResultTy
1242   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1243                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1244                      bool (*ConvertResult)(int64_t &) = nullptr);
1245 
1246   OperandMatchResultTy
1247   parseOperandArrayWithPrefix(const char *Prefix,
1248                               OperandVector &Operands,
1249                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1250                               bool (*ConvertResult)(int64_t&) = nullptr);
1251 
1252   OperandMatchResultTy
1253   parseNamedBit(const char *Name, OperandVector &Operands,
1254                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1255   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1256                                              StringRef &Value);
1257 
1258   bool isModifier();
1259   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1260   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1261   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1262   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1263   bool parseSP3NegModifier();
1264   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1265   OperandMatchResultTy parseReg(OperandVector &Operands);
1266   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1267   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1268   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1269   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1270   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1271   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1272   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1273 
1274   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1275   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1276   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1277   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1278 
1279   bool parseCnt(int64_t &IntVal);
1280   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1281   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1282 
1283 private:
1284   struct OperandInfoTy {
1285     int64_t Id;
1286     bool IsSymbolic = false;
1287     bool IsDefined = false;
1288 
1289     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1290   };
1291 
1292   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1293   bool validateSendMsg(const OperandInfoTy &Msg,
1294                        const OperandInfoTy &Op,
1295                        const OperandInfoTy &Stream,
1296                        const SMLoc Loc);
1297 
1298   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1299   bool validateHwreg(const OperandInfoTy &HwReg,
1300                      const int64_t Offset,
1301                      const int64_t Width,
1302                      const SMLoc Loc);
1303 
1304   void errorExpTgt();
1305   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1306   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1307 
1308   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1309   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1310   bool validateSOPLiteral(const MCInst &Inst) const;
1311   bool validateConstantBusLimitations(const MCInst &Inst);
1312   bool validateEarlyClobberLimitations(const MCInst &Inst);
1313   bool validateIntClampSupported(const MCInst &Inst);
1314   bool validateMIMGAtomicDMask(const MCInst &Inst);
1315   bool validateMIMGGatherDMask(const MCInst &Inst);
1316   bool validateMIMGDataSize(const MCInst &Inst);
1317   bool validateMIMGAddrSize(const MCInst &Inst);
1318   bool validateMIMGD16(const MCInst &Inst);
1319   bool validateMIMGDim(const MCInst &Inst);
1320   bool validateLdsDirect(const MCInst &Inst);
1321   bool validateOpSel(const MCInst &Inst);
1322   bool validateVccOperand(unsigned Reg) const;
1323   bool validateVOP3Literal(const MCInst &Inst) const;
1324   unsigned getConstantBusLimit(unsigned Opcode) const;
1325   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1326   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1327   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1328 
1329   bool isId(const StringRef Id) const;
1330   bool isId(const AsmToken &Token, const StringRef Id) const;
1331   bool isToken(const AsmToken::TokenKind Kind) const;
1332   bool trySkipId(const StringRef Id);
1333   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1334   bool trySkipToken(const AsmToken::TokenKind Kind);
1335   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1336   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1337   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1338   AsmToken::TokenKind getTokenKind() const;
1339   bool parseExpr(int64_t &Imm);
1340   bool parseExpr(OperandVector &Operands);
1341   StringRef getTokenStr() const;
1342   AsmToken peekToken();
1343   AsmToken getToken() const;
1344   SMLoc getLoc() const;
1345   void lex();
1346 
1347 public:
1348   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1349   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1350 
1351   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1352   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1353   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1354   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1355   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1356   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1357 
1358   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1359                             const unsigned MinVal,
1360                             const unsigned MaxVal,
1361                             const StringRef ErrMsg);
1362   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1363   bool parseSwizzleOffset(int64_t &Imm);
1364   bool parseSwizzleMacro(int64_t &Imm);
1365   bool parseSwizzleQuadPerm(int64_t &Imm);
1366   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1367   bool parseSwizzleBroadcast(int64_t &Imm);
1368   bool parseSwizzleSwap(int64_t &Imm);
1369   bool parseSwizzleReverse(int64_t &Imm);
1370 
1371   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1372   int64_t parseGPRIdxMacro();
1373 
1374   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1375   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1376   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1377   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1378   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1379 
1380   AMDGPUOperand::Ptr defaultDLC() const;
1381   AMDGPUOperand::Ptr defaultGLC() const;
1382   AMDGPUOperand::Ptr defaultSLC() const;
1383 
1384   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1385   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1386   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1387   AMDGPUOperand::Ptr defaultFlatOffset() const;
1388 
1389   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1390 
1391   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1392                OptionalImmIndexMap &OptionalIdx);
1393   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1394   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1395   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1396 
1397   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1398 
1399   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1400                bool IsAtomic = false);
1401   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1402 
1403   OperandMatchResultTy parseDim(OperandVector &Operands);
1404   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1405   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1406   AMDGPUOperand::Ptr defaultRowMask() const;
1407   AMDGPUOperand::Ptr defaultBankMask() const;
1408   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1409   AMDGPUOperand::Ptr defaultFI() const;
1410   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1411   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1412 
1413   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1414                                     AMDGPUOperand::ImmTy Type);
1415   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1416   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1417   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1418   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1419   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1420   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1421                 uint64_t BasicInstType, bool skipVcc = false);
1422 
1423   AMDGPUOperand::Ptr defaultBLGP() const;
1424   AMDGPUOperand::Ptr defaultCBSZ() const;
1425   AMDGPUOperand::Ptr defaultABID() const;
1426 
1427   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1428   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1429 };
1430 
1431 struct OptionalOperand {
1432   const char *Name;
1433   AMDGPUOperand::ImmTy Type;
1434   bool IsBit;
1435   bool (*ConvertResult)(int64_t&);
1436 };
1437 
1438 } // end anonymous namespace
1439 
1440 // May be called with integer type with equivalent bitwidth.
1441 static const fltSemantics *getFltSemantics(unsigned Size) {
1442   switch (Size) {
1443   case 4:
1444     return &APFloat::IEEEsingle();
1445   case 8:
1446     return &APFloat::IEEEdouble();
1447   case 2:
1448     return &APFloat::IEEEhalf();
1449   default:
1450     llvm_unreachable("unsupported fp type");
1451   }
1452 }
1453 
1454 static const fltSemantics *getFltSemantics(MVT VT) {
1455   return getFltSemantics(VT.getSizeInBits() / 8);
1456 }
1457 
1458 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1459   switch (OperandType) {
1460   case AMDGPU::OPERAND_REG_IMM_INT32:
1461   case AMDGPU::OPERAND_REG_IMM_FP32:
1462   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1463   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1464   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1465   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1466     return &APFloat::IEEEsingle();
1467   case AMDGPU::OPERAND_REG_IMM_INT64:
1468   case AMDGPU::OPERAND_REG_IMM_FP64:
1469   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1470   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1471     return &APFloat::IEEEdouble();
1472   case AMDGPU::OPERAND_REG_IMM_INT16:
1473   case AMDGPU::OPERAND_REG_IMM_FP16:
1474   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1475   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1476   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1477   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1478   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1479   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1480   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1481   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1482   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1483   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1484     return &APFloat::IEEEhalf();
1485   default:
1486     llvm_unreachable("unsupported fp type");
1487   }
1488 }
1489 
1490 //===----------------------------------------------------------------------===//
1491 // Operand
1492 //===----------------------------------------------------------------------===//
1493 
1494 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1495   bool Lost;
1496 
1497   // Convert literal to single precision
1498   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1499                                                APFloat::rmNearestTiesToEven,
1500                                                &Lost);
1501   // We allow precision lost but not overflow or underflow
1502   if (Status != APFloat::opOK &&
1503       Lost &&
1504       ((Status & APFloat::opOverflow)  != 0 ||
1505        (Status & APFloat::opUnderflow) != 0)) {
1506     return false;
1507   }
1508 
1509   return true;
1510 }
1511 
1512 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1513   return isUIntN(Size, Val) || isIntN(Size, Val);
1514 }
1515 
1516 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1517 
1518   // This is a hack to enable named inline values like
1519   // shared_base with both 32-bit and 64-bit operands.
1520   // Note that these values are defined as
1521   // 32-bit operands only.
1522   if (isInlineValue()) {
1523     return true;
1524   }
1525 
1526   if (!isImmTy(ImmTyNone)) {
1527     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1528     return false;
1529   }
1530   // TODO: We should avoid using host float here. It would be better to
1531   // check the float bit values which is what a few other places do.
1532   // We've had bot failures before due to weird NaN support on mips hosts.
1533 
1534   APInt Literal(64, Imm.Val);
1535 
1536   if (Imm.IsFPImm) { // We got fp literal token
1537     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1538       return AMDGPU::isInlinableLiteral64(Imm.Val,
1539                                           AsmParser->hasInv2PiInlineImm());
1540     }
1541 
1542     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1543     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1544       return false;
1545 
1546     if (type.getScalarSizeInBits() == 16) {
1547       return AMDGPU::isInlinableLiteral16(
1548         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1549         AsmParser->hasInv2PiInlineImm());
1550     }
1551 
1552     // Check if single precision literal is inlinable
1553     return AMDGPU::isInlinableLiteral32(
1554       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1555       AsmParser->hasInv2PiInlineImm());
1556   }
1557 
1558   // We got int literal token.
1559   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1560     return AMDGPU::isInlinableLiteral64(Imm.Val,
1561                                         AsmParser->hasInv2PiInlineImm());
1562   }
1563 
1564   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1565     return false;
1566   }
1567 
1568   if (type.getScalarSizeInBits() == 16) {
1569     return AMDGPU::isInlinableLiteral16(
1570       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1571       AsmParser->hasInv2PiInlineImm());
1572   }
1573 
1574   return AMDGPU::isInlinableLiteral32(
1575     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1576     AsmParser->hasInv2PiInlineImm());
1577 }
1578 
1579 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1580   // Check that this immediate can be added as literal
1581   if (!isImmTy(ImmTyNone)) {
1582     return false;
1583   }
1584 
1585   if (!Imm.IsFPImm) {
1586     // We got int literal token.
1587 
1588     if (type == MVT::f64 && hasFPModifiers()) {
1589       // Cannot apply fp modifiers to int literals preserving the same semantics
1590       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1591       // disable these cases.
1592       return false;
1593     }
1594 
1595     unsigned Size = type.getSizeInBits();
1596     if (Size == 64)
1597       Size = 32;
1598 
1599     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1600     // types.
1601     return isSafeTruncation(Imm.Val, Size);
1602   }
1603 
1604   // We got fp literal token
1605   if (type == MVT::f64) { // Expected 64-bit fp operand
1606     // We would set low 64-bits of literal to zeroes but we accept this literals
1607     return true;
1608   }
1609 
1610   if (type == MVT::i64) { // Expected 64-bit int operand
1611     // We don't allow fp literals in 64-bit integer instructions. It is
1612     // unclear how we should encode them.
1613     return false;
1614   }
1615 
1616   // We allow fp literals with f16x2 operands assuming that the specified
1617   // literal goes into the lower half and the upper half is zero. We also
1618   // require that the literal may be losslesly converted to f16.
1619   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1620                      (type == MVT::v2i16)? MVT::i16 : type;
1621 
1622   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1623   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1624 }
1625 
1626 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1627   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1628 }
1629 
1630 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1631   if (AsmParser->isVI())
1632     return isVReg32();
1633   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1634     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1635   else
1636     return false;
1637 }
1638 
1639 bool AMDGPUOperand::isSDWAFP16Operand() const {
1640   return isSDWAOperand(MVT::f16);
1641 }
1642 
1643 bool AMDGPUOperand::isSDWAFP32Operand() const {
1644   return isSDWAOperand(MVT::f32);
1645 }
1646 
1647 bool AMDGPUOperand::isSDWAInt16Operand() const {
1648   return isSDWAOperand(MVT::i16);
1649 }
1650 
1651 bool AMDGPUOperand::isSDWAInt32Operand() const {
1652   return isSDWAOperand(MVT::i32);
1653 }
1654 
1655 bool AMDGPUOperand::isBoolReg() const {
1656   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1657          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1658 }
1659 
1660 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1661 {
1662   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1663   assert(Size == 2 || Size == 4 || Size == 8);
1664 
1665   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1666 
1667   if (Imm.Mods.Abs) {
1668     Val &= ~FpSignMask;
1669   }
1670   if (Imm.Mods.Neg) {
1671     Val ^= FpSignMask;
1672   }
1673 
1674   return Val;
1675 }
1676 
1677 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1678   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1679                              Inst.getNumOperands())) {
1680     addLiteralImmOperand(Inst, Imm.Val,
1681                          ApplyModifiers &
1682                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1683   } else {
1684     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1685     Inst.addOperand(MCOperand::createImm(Imm.Val));
1686   }
1687 }
1688 
1689 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1690   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1691   auto OpNum = Inst.getNumOperands();
1692   // Check that this operand accepts literals
1693   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1694 
1695   if (ApplyModifiers) {
1696     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1697     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1698     Val = applyInputFPModifiers(Val, Size);
1699   }
1700 
1701   APInt Literal(64, Val);
1702   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1703 
1704   if (Imm.IsFPImm) { // We got fp literal token
1705     switch (OpTy) {
1706     case AMDGPU::OPERAND_REG_IMM_INT64:
1707     case AMDGPU::OPERAND_REG_IMM_FP64:
1708     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1709     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1710       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1711                                        AsmParser->hasInv2PiInlineImm())) {
1712         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1713         return;
1714       }
1715 
1716       // Non-inlineable
1717       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1718         // For fp operands we check if low 32 bits are zeros
1719         if (Literal.getLoBits(32) != 0) {
1720           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1721           "Can't encode literal as exact 64-bit floating-point operand. "
1722           "Low 32-bits will be set to zero");
1723         }
1724 
1725         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1726         return;
1727       }
1728 
1729       // We don't allow fp literals in 64-bit integer instructions. It is
1730       // unclear how we should encode them. This case should be checked earlier
1731       // in predicate methods (isLiteralImm())
1732       llvm_unreachable("fp literal in 64-bit integer instruction.");
1733 
1734     case AMDGPU::OPERAND_REG_IMM_INT32:
1735     case AMDGPU::OPERAND_REG_IMM_FP32:
1736     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1737     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1738     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1739     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1740     case AMDGPU::OPERAND_REG_IMM_INT16:
1741     case AMDGPU::OPERAND_REG_IMM_FP16:
1742     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1743     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1744     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1745     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1746     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1747     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1748     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1749     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1750     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1751     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1752       bool lost;
1753       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1754       // Convert literal to single precision
1755       FPLiteral.convert(*getOpFltSemantics(OpTy),
1756                         APFloat::rmNearestTiesToEven, &lost);
1757       // We allow precision lost but not overflow or underflow. This should be
1758       // checked earlier in isLiteralImm()
1759 
1760       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1761       Inst.addOperand(MCOperand::createImm(ImmVal));
1762       return;
1763     }
1764     default:
1765       llvm_unreachable("invalid operand size");
1766     }
1767 
1768     return;
1769   }
1770 
1771   // We got int literal token.
1772   // Only sign extend inline immediates.
1773   switch (OpTy) {
1774   case AMDGPU::OPERAND_REG_IMM_INT32:
1775   case AMDGPU::OPERAND_REG_IMM_FP32:
1776   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1777   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1778   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1779   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1780   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1781   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1782     if (isSafeTruncation(Val, 32) &&
1783         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1784                                      AsmParser->hasInv2PiInlineImm())) {
1785       Inst.addOperand(MCOperand::createImm(Val));
1786       return;
1787     }
1788 
1789     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1790     return;
1791 
1792   case AMDGPU::OPERAND_REG_IMM_INT64:
1793   case AMDGPU::OPERAND_REG_IMM_FP64:
1794   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1795   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1796     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1797       Inst.addOperand(MCOperand::createImm(Val));
1798       return;
1799     }
1800 
1801     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1802     return;
1803 
1804   case AMDGPU::OPERAND_REG_IMM_INT16:
1805   case AMDGPU::OPERAND_REG_IMM_FP16:
1806   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1807   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1808   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1809   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1810     if (isSafeTruncation(Val, 16) &&
1811         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1812                                      AsmParser->hasInv2PiInlineImm())) {
1813       Inst.addOperand(MCOperand::createImm(Val));
1814       return;
1815     }
1816 
1817     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1818     return;
1819 
1820   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1821   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1822   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1823   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1824     assert(isSafeTruncation(Val, 16));
1825     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1826                                         AsmParser->hasInv2PiInlineImm()));
1827 
1828     Inst.addOperand(MCOperand::createImm(Val));
1829     return;
1830   }
1831   default:
1832     llvm_unreachable("invalid operand size");
1833   }
1834 }
1835 
1836 template <unsigned Bitwidth>
1837 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1838   APInt Literal(64, Imm.Val);
1839 
1840   if (!Imm.IsFPImm) {
1841     // We got int literal token.
1842     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1843     return;
1844   }
1845 
1846   bool Lost;
1847   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1848   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1849                     APFloat::rmNearestTiesToEven, &Lost);
1850   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1851 }
1852 
1853 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1854   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1855 }
1856 
1857 static bool isInlineValue(unsigned Reg) {
1858   switch (Reg) {
1859   case AMDGPU::SRC_SHARED_BASE:
1860   case AMDGPU::SRC_SHARED_LIMIT:
1861   case AMDGPU::SRC_PRIVATE_BASE:
1862   case AMDGPU::SRC_PRIVATE_LIMIT:
1863   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1864     return true;
1865   case AMDGPU::SRC_VCCZ:
1866   case AMDGPU::SRC_EXECZ:
1867   case AMDGPU::SRC_SCC:
1868     return true;
1869   case AMDGPU::SGPR_NULL:
1870     return true;
1871   default:
1872     return false;
1873   }
1874 }
1875 
1876 bool AMDGPUOperand::isInlineValue() const {
1877   return isRegKind() && ::isInlineValue(getReg());
1878 }
1879 
1880 //===----------------------------------------------------------------------===//
1881 // AsmParser
1882 //===----------------------------------------------------------------------===//
1883 
1884 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1885   if (Is == IS_VGPR) {
1886     switch (RegWidth) {
1887       default: return -1;
1888       case 1: return AMDGPU::VGPR_32RegClassID;
1889       case 2: return AMDGPU::VReg_64RegClassID;
1890       case 3: return AMDGPU::VReg_96RegClassID;
1891       case 4: return AMDGPU::VReg_128RegClassID;
1892       case 5: return AMDGPU::VReg_160RegClassID;
1893       case 8: return AMDGPU::VReg_256RegClassID;
1894       case 16: return AMDGPU::VReg_512RegClassID;
1895       case 32: return AMDGPU::VReg_1024RegClassID;
1896     }
1897   } else if (Is == IS_TTMP) {
1898     switch (RegWidth) {
1899       default: return -1;
1900       case 1: return AMDGPU::TTMP_32RegClassID;
1901       case 2: return AMDGPU::TTMP_64RegClassID;
1902       case 4: return AMDGPU::TTMP_128RegClassID;
1903       case 8: return AMDGPU::TTMP_256RegClassID;
1904       case 16: return AMDGPU::TTMP_512RegClassID;
1905     }
1906   } else if (Is == IS_SGPR) {
1907     switch (RegWidth) {
1908       default: return -1;
1909       case 1: return AMDGPU::SGPR_32RegClassID;
1910       case 2: return AMDGPU::SGPR_64RegClassID;
1911       case 4: return AMDGPU::SGPR_128RegClassID;
1912       case 8: return AMDGPU::SGPR_256RegClassID;
1913       case 16: return AMDGPU::SGPR_512RegClassID;
1914     }
1915   } else if (Is == IS_AGPR) {
1916     switch (RegWidth) {
1917       default: return -1;
1918       case 1: return AMDGPU::AGPR_32RegClassID;
1919       case 2: return AMDGPU::AReg_64RegClassID;
1920       case 4: return AMDGPU::AReg_128RegClassID;
1921       case 16: return AMDGPU::AReg_512RegClassID;
1922       case 32: return AMDGPU::AReg_1024RegClassID;
1923     }
1924   }
1925   return -1;
1926 }
1927 
1928 static unsigned getSpecialRegForName(StringRef RegName) {
1929   return StringSwitch<unsigned>(RegName)
1930     .Case("exec", AMDGPU::EXEC)
1931     .Case("vcc", AMDGPU::VCC)
1932     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1933     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1934     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1935     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1936     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1937     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1938     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1939     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1940     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1941     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1942     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1943     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1944     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1945     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1946     .Case("m0", AMDGPU::M0)
1947     .Case("vccz", AMDGPU::SRC_VCCZ)
1948     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1949     .Case("execz", AMDGPU::SRC_EXECZ)
1950     .Case("src_execz", AMDGPU::SRC_EXECZ)
1951     .Case("scc", AMDGPU::SRC_SCC)
1952     .Case("src_scc", AMDGPU::SRC_SCC)
1953     .Case("tba", AMDGPU::TBA)
1954     .Case("tma", AMDGPU::TMA)
1955     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1956     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1957     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1958     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1959     .Case("vcc_lo", AMDGPU::VCC_LO)
1960     .Case("vcc_hi", AMDGPU::VCC_HI)
1961     .Case("exec_lo", AMDGPU::EXEC_LO)
1962     .Case("exec_hi", AMDGPU::EXEC_HI)
1963     .Case("tma_lo", AMDGPU::TMA_LO)
1964     .Case("tma_hi", AMDGPU::TMA_HI)
1965     .Case("tba_lo", AMDGPU::TBA_LO)
1966     .Case("tba_hi", AMDGPU::TBA_HI)
1967     .Case("null", AMDGPU::SGPR_NULL)
1968     .Default(AMDGPU::NoRegister);
1969 }
1970 
1971 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1972                                     SMLoc &EndLoc) {
1973   auto R = parseRegister();
1974   if (!R) return true;
1975   assert(R->isReg());
1976   RegNo = R->getReg();
1977   StartLoc = R->getStartLoc();
1978   EndLoc = R->getEndLoc();
1979   return false;
1980 }
1981 
1982 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1983                                             RegisterKind RegKind, unsigned Reg1) {
1984   switch (RegKind) {
1985   case IS_SPECIAL:
1986     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1987       Reg = AMDGPU::EXEC;
1988       RegWidth = 2;
1989       return true;
1990     }
1991     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1992       Reg = AMDGPU::FLAT_SCR;
1993       RegWidth = 2;
1994       return true;
1995     }
1996     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1997       Reg = AMDGPU::XNACK_MASK;
1998       RegWidth = 2;
1999       return true;
2000     }
2001     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2002       Reg = AMDGPU::VCC;
2003       RegWidth = 2;
2004       return true;
2005     }
2006     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2007       Reg = AMDGPU::TBA;
2008       RegWidth = 2;
2009       return true;
2010     }
2011     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2012       Reg = AMDGPU::TMA;
2013       RegWidth = 2;
2014       return true;
2015     }
2016     return false;
2017   case IS_VGPR:
2018   case IS_SGPR:
2019   case IS_AGPR:
2020   case IS_TTMP:
2021     if (Reg1 != Reg + RegWidth) {
2022       return false;
2023     }
2024     RegWidth++;
2025     return true;
2026   default:
2027     llvm_unreachable("unexpected register kind");
2028   }
2029 }
2030 
2031 struct RegInfo {
2032   StringLiteral Name;
2033   RegisterKind Kind;
2034 };
2035 
2036 static constexpr RegInfo RegularRegisters[] = {
2037   {{"v"},    IS_VGPR},
2038   {{"s"},    IS_SGPR},
2039   {{"ttmp"}, IS_TTMP},
2040   {{"acc"},  IS_AGPR},
2041   {{"a"},    IS_AGPR},
2042 };
2043 
2044 static bool isRegularReg(RegisterKind Kind) {
2045   return Kind == IS_VGPR ||
2046          Kind == IS_SGPR ||
2047          Kind == IS_TTMP ||
2048          Kind == IS_AGPR;
2049 }
2050 
2051 static const RegInfo* getRegularRegInfo(StringRef Str) {
2052   for (const RegInfo &Reg : RegularRegisters)
2053     if (Str.startswith(Reg.Name))
2054       return &Reg;
2055   return nullptr;
2056 }
2057 
2058 static bool getRegNum(StringRef Str, unsigned& Num) {
2059   return !Str.getAsInteger(10, Num);
2060 }
2061 
2062 bool
2063 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2064                             const AsmToken &NextToken) const {
2065 
2066   // A list of consecutive registers: [s0,s1,s2,s3]
2067   if (Token.is(AsmToken::LBrac))
2068     return true;
2069 
2070   if (!Token.is(AsmToken::Identifier))
2071     return false;
2072 
2073   // A single register like s0 or a range of registers like s[0:1]
2074 
2075   StringRef Str = Token.getString();
2076   const RegInfo *Reg = getRegularRegInfo(Str);
2077   if (Reg) {
2078     StringRef RegName = Reg->Name;
2079     StringRef RegSuffix = Str.substr(RegName.size());
2080     if (!RegSuffix.empty()) {
2081       unsigned Num;
2082       // A single register with an index: rXX
2083       if (getRegNum(RegSuffix, Num))
2084         return true;
2085     } else {
2086       // A range of registers: r[XX:YY].
2087       if (NextToken.is(AsmToken::LBrac))
2088         return true;
2089     }
2090   }
2091 
2092   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2093 }
2094 
2095 bool
2096 AMDGPUAsmParser::isRegister()
2097 {
2098   return isRegister(getToken(), peekToken());
2099 }
2100 
2101 unsigned
2102 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2103                                unsigned RegNum,
2104                                unsigned RegWidth) {
2105 
2106   assert(isRegularReg(RegKind));
2107 
2108   unsigned AlignSize = 1;
2109   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2110     // SGPR and TTMP registers must be aligned.
2111     // Max required alignment is 4 dwords.
2112     AlignSize = std::min(RegWidth, 4u);
2113   }
2114 
2115   if (RegNum % AlignSize != 0)
2116     return AMDGPU::NoRegister;
2117 
2118   unsigned RegIdx = RegNum / AlignSize;
2119   int RCID = getRegClass(RegKind, RegWidth);
2120   if (RCID == -1)
2121     return AMDGPU::NoRegister;
2122 
2123   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2124   const MCRegisterClass RC = TRI->getRegClass(RCID);
2125   if (RegIdx >= RC.getNumRegs())
2126     return AMDGPU::NoRegister;
2127 
2128   return RC.getRegister(RegIdx);
2129 }
2130 
2131 bool
2132 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2133   int64_t RegLo, RegHi;
2134   if (!trySkipToken(AsmToken::LBrac))
2135     return false;
2136 
2137   if (!parseExpr(RegLo))
2138     return false;
2139 
2140   if (trySkipToken(AsmToken::Colon)) {
2141     if (!parseExpr(RegHi))
2142       return false;
2143   } else {
2144     RegHi = RegLo;
2145   }
2146 
2147   if (!trySkipToken(AsmToken::RBrac))
2148     return false;
2149 
2150   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2151     return false;
2152 
2153   Num = static_cast<unsigned>(RegLo);
2154   Width = (RegHi - RegLo) + 1;
2155   return true;
2156 }
2157 
2158 unsigned
2159 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2160                                  unsigned &RegNum,
2161                                  unsigned &RegWidth) {
2162   assert(isToken(AsmToken::Identifier));
2163   unsigned Reg = getSpecialRegForName(getTokenStr());
2164   if (Reg) {
2165     RegNum = 0;
2166     RegWidth = 1;
2167     RegKind = IS_SPECIAL;
2168     lex(); // skip register name
2169   }
2170   return Reg;
2171 }
2172 
2173 unsigned
2174 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2175                                  unsigned &RegNum,
2176                                  unsigned &RegWidth) {
2177   assert(isToken(AsmToken::Identifier));
2178   StringRef RegName = getTokenStr();
2179 
2180   const RegInfo *RI = getRegularRegInfo(RegName);
2181   if (!RI)
2182     return AMDGPU::NoRegister;
2183   lex(); // skip register name
2184 
2185   RegKind = RI->Kind;
2186   StringRef RegSuffix = RegName.substr(RI->Name.size());
2187   if (!RegSuffix.empty()) {
2188     // Single 32-bit register: vXX.
2189     if (!getRegNum(RegSuffix, RegNum))
2190       return AMDGPU::NoRegister;
2191     RegWidth = 1;
2192   } else {
2193     // Range of registers: v[XX:YY]. ":YY" is optional.
2194     if (!ParseRegRange(RegNum, RegWidth))
2195       return AMDGPU::NoRegister;
2196   }
2197 
2198   return getRegularReg(RegKind, RegNum, RegWidth);
2199 }
2200 
2201 unsigned
2202 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2203                               unsigned &RegNum,
2204                               unsigned &RegWidth) {
2205   unsigned Reg = AMDGPU::NoRegister;
2206 
2207   if (!trySkipToken(AsmToken::LBrac))
2208     return AMDGPU::NoRegister;
2209 
2210   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2211 
2212   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2213     return AMDGPU::NoRegister;
2214   if (RegWidth != 1)
2215     return AMDGPU::NoRegister;
2216 
2217   for (; trySkipToken(AsmToken::Comma); ) {
2218     RegisterKind NextRegKind;
2219     unsigned NextReg, NextRegNum, NextRegWidth;
2220 
2221     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2222       return AMDGPU::NoRegister;
2223     if (NextRegWidth != 1)
2224       return AMDGPU::NoRegister;
2225     if (NextRegKind != RegKind)
2226       return AMDGPU::NoRegister;
2227     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2228       return AMDGPU::NoRegister;
2229   }
2230 
2231   if (!trySkipToken(AsmToken::RBrac))
2232     return AMDGPU::NoRegister;
2233 
2234   if (isRegularReg(RegKind))
2235     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2236 
2237   return Reg;
2238 }
2239 
2240 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2241                                           unsigned &Reg,
2242                                           unsigned &RegNum,
2243                                           unsigned &RegWidth) {
2244   Reg = AMDGPU::NoRegister;
2245 
2246   if (isToken(AsmToken::Identifier)) {
2247     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2248     if (Reg == AMDGPU::NoRegister)
2249       Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2250   } else {
2251     Reg = ParseRegList(RegKind, RegNum, RegWidth);
2252   }
2253 
2254   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2255   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2256 }
2257 
2258 Optional<StringRef>
2259 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2260   switch (RegKind) {
2261   case IS_VGPR:
2262     return StringRef(".amdgcn.next_free_vgpr");
2263   case IS_SGPR:
2264     return StringRef(".amdgcn.next_free_sgpr");
2265   default:
2266     return None;
2267   }
2268 }
2269 
2270 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2271   auto SymbolName = getGprCountSymbolName(RegKind);
2272   assert(SymbolName && "initializing invalid register kind");
2273   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2274   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2275 }
2276 
2277 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2278                                             unsigned DwordRegIndex,
2279                                             unsigned RegWidth) {
2280   // Symbols are only defined for GCN targets
2281   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2282     return true;
2283 
2284   auto SymbolName = getGprCountSymbolName(RegKind);
2285   if (!SymbolName)
2286     return true;
2287   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2288 
2289   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2290   int64_t OldCount;
2291 
2292   if (!Sym->isVariable())
2293     return !Error(getParser().getTok().getLoc(),
2294                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2295   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2296     return !Error(
2297         getParser().getTok().getLoc(),
2298         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2299 
2300   if (OldCount <= NewMax)
2301     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2302 
2303   return true;
2304 }
2305 
2306 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2307   const auto &Tok = Parser.getTok();
2308   SMLoc StartLoc = Tok.getLoc();
2309   SMLoc EndLoc = Tok.getEndLoc();
2310   RegisterKind RegKind;
2311   unsigned Reg, RegNum, RegWidth;
2312 
2313   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2314     //FIXME: improve error messages (bug 41303).
2315     Error(StartLoc, "not a valid operand.");
2316     return nullptr;
2317   }
2318   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2319     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2320       return nullptr;
2321   } else
2322     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2323   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2324 }
2325 
2326 OperandMatchResultTy
2327 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2328   // TODO: add syntactic sugar for 1/(2*PI)
2329 
2330   assert(!isRegister());
2331   assert(!isModifier());
2332 
2333   const auto& Tok = getToken();
2334   const auto& NextTok = peekToken();
2335   bool IsReal = Tok.is(AsmToken::Real);
2336   SMLoc S = getLoc();
2337   bool Negate = false;
2338 
2339   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2340     lex();
2341     IsReal = true;
2342     Negate = true;
2343   }
2344 
2345   if (IsReal) {
2346     // Floating-point expressions are not supported.
2347     // Can only allow floating-point literals with an
2348     // optional sign.
2349 
2350     StringRef Num = getTokenStr();
2351     lex();
2352 
2353     APFloat RealVal(APFloat::IEEEdouble());
2354     auto roundMode = APFloat::rmNearestTiesToEven;
2355     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2356       return MatchOperand_ParseFail;
2357     }
2358     if (Negate)
2359       RealVal.changeSign();
2360 
2361     Operands.push_back(
2362       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2363                                AMDGPUOperand::ImmTyNone, true));
2364 
2365     return MatchOperand_Success;
2366 
2367   } else {
2368     int64_t IntVal;
2369     const MCExpr *Expr;
2370     SMLoc S = getLoc();
2371 
2372     if (HasSP3AbsModifier) {
2373       // This is a workaround for handling expressions
2374       // as arguments of SP3 'abs' modifier, for example:
2375       //     |1.0|
2376       //     |-1|
2377       //     |1+x|
2378       // This syntax is not compatible with syntax of standard
2379       // MC expressions (due to the trailing '|').
2380       SMLoc EndLoc;
2381       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2382         return MatchOperand_ParseFail;
2383     } else {
2384       if (Parser.parseExpression(Expr))
2385         return MatchOperand_ParseFail;
2386     }
2387 
2388     if (Expr->evaluateAsAbsolute(IntVal)) {
2389       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2390     } else {
2391       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2392     }
2393 
2394     return MatchOperand_Success;
2395   }
2396 
2397   return MatchOperand_NoMatch;
2398 }
2399 
2400 OperandMatchResultTy
2401 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2402   if (!isRegister())
2403     return MatchOperand_NoMatch;
2404 
2405   if (auto R = parseRegister()) {
2406     assert(R->isReg());
2407     Operands.push_back(std::move(R));
2408     return MatchOperand_Success;
2409   }
2410   return MatchOperand_ParseFail;
2411 }
2412 
2413 OperandMatchResultTy
2414 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2415   auto res = parseReg(Operands);
2416   if (res != MatchOperand_NoMatch) {
2417     return res;
2418   } else if (isModifier()) {
2419     return MatchOperand_NoMatch;
2420   } else {
2421     return parseImm(Operands, HasSP3AbsMod);
2422   }
2423 }
2424 
2425 bool
2426 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2427   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2428     const auto &str = Token.getString();
2429     return str == "abs" || str == "neg" || str == "sext";
2430   }
2431   return false;
2432 }
2433 
2434 bool
2435 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2436   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2437 }
2438 
2439 bool
2440 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2441   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2442 }
2443 
2444 bool
2445 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2446   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2447 }
2448 
2449 // Check if this is an operand modifier or an opcode modifier
2450 // which may look like an expression but it is not. We should
2451 // avoid parsing these modifiers as expressions. Currently
2452 // recognized sequences are:
2453 //   |...|
2454 //   abs(...)
2455 //   neg(...)
2456 //   sext(...)
2457 //   -reg
2458 //   -|...|
2459 //   -abs(...)
2460 //   name:...
2461 // Note that simple opcode modifiers like 'gds' may be parsed as
2462 // expressions; this is a special case. See getExpressionAsToken.
2463 //
2464 bool
2465 AMDGPUAsmParser::isModifier() {
2466 
2467   AsmToken Tok = getToken();
2468   AsmToken NextToken[2];
2469   peekTokens(NextToken);
2470 
2471   return isOperandModifier(Tok, NextToken[0]) ||
2472          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2473          isOpcodeModifierWithVal(Tok, NextToken[0]);
2474 }
2475 
2476 // Check if the current token is an SP3 'neg' modifier.
2477 // Currently this modifier is allowed in the following context:
2478 //
2479 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2480 // 2. Before an 'abs' modifier: -abs(...)
2481 // 3. Before an SP3 'abs' modifier: -|...|
2482 //
2483 // In all other cases "-" is handled as a part
2484 // of an expression that follows the sign.
2485 //
2486 // Note: When "-" is followed by an integer literal,
2487 // this is interpreted as integer negation rather
2488 // than a floating-point NEG modifier applied to N.
2489 // Beside being contr-intuitive, such use of floating-point
2490 // NEG modifier would have resulted in different meaning
2491 // of integer literals used with VOP1/2/C and VOP3,
2492 // for example:
2493 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2494 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2495 // Negative fp literals with preceding "-" are
2496 // handled likewise for unifomtity
2497 //
2498 bool
2499 AMDGPUAsmParser::parseSP3NegModifier() {
2500 
2501   AsmToken NextToken[2];
2502   peekTokens(NextToken);
2503 
2504   if (isToken(AsmToken::Minus) &&
2505       (isRegister(NextToken[0], NextToken[1]) ||
2506        NextToken[0].is(AsmToken::Pipe) ||
2507        isId(NextToken[0], "abs"))) {
2508     lex();
2509     return true;
2510   }
2511 
2512   return false;
2513 }
2514 
2515 OperandMatchResultTy
2516 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2517                                               bool AllowImm) {
2518   bool Neg, SP3Neg;
2519   bool Abs, SP3Abs;
2520   SMLoc Loc;
2521 
2522   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2523   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2524     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2525     return MatchOperand_ParseFail;
2526   }
2527 
2528   SP3Neg = parseSP3NegModifier();
2529 
2530   Loc = getLoc();
2531   Neg = trySkipId("neg");
2532   if (Neg && SP3Neg) {
2533     Error(Loc, "expected register or immediate");
2534     return MatchOperand_ParseFail;
2535   }
2536   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2537     return MatchOperand_ParseFail;
2538 
2539   Abs = trySkipId("abs");
2540   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2541     return MatchOperand_ParseFail;
2542 
2543   Loc = getLoc();
2544   SP3Abs = trySkipToken(AsmToken::Pipe);
2545   if (Abs && SP3Abs) {
2546     Error(Loc, "expected register or immediate");
2547     return MatchOperand_ParseFail;
2548   }
2549 
2550   OperandMatchResultTy Res;
2551   if (AllowImm) {
2552     Res = parseRegOrImm(Operands, SP3Abs);
2553   } else {
2554     Res = parseReg(Operands);
2555   }
2556   if (Res != MatchOperand_Success) {
2557     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2558   }
2559 
2560   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2561     return MatchOperand_ParseFail;
2562   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2563     return MatchOperand_ParseFail;
2564   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2565     return MatchOperand_ParseFail;
2566 
2567   AMDGPUOperand::Modifiers Mods;
2568   Mods.Abs = Abs || SP3Abs;
2569   Mods.Neg = Neg || SP3Neg;
2570 
2571   if (Mods.hasFPModifiers()) {
2572     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2573     if (Op.isExpr()) {
2574       Error(Op.getStartLoc(), "expected an absolute expression");
2575       return MatchOperand_ParseFail;
2576     }
2577     Op.setModifiers(Mods);
2578   }
2579   return MatchOperand_Success;
2580 }
2581 
2582 OperandMatchResultTy
2583 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2584                                                bool AllowImm) {
2585   bool Sext = trySkipId("sext");
2586   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2587     return MatchOperand_ParseFail;
2588 
2589   OperandMatchResultTy Res;
2590   if (AllowImm) {
2591     Res = parseRegOrImm(Operands);
2592   } else {
2593     Res = parseReg(Operands);
2594   }
2595   if (Res != MatchOperand_Success) {
2596     return Sext? MatchOperand_ParseFail : Res;
2597   }
2598 
2599   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2600     return MatchOperand_ParseFail;
2601 
2602   AMDGPUOperand::Modifiers Mods;
2603   Mods.Sext = Sext;
2604 
2605   if (Mods.hasIntModifiers()) {
2606     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2607     if (Op.isExpr()) {
2608       Error(Op.getStartLoc(), "expected an absolute expression");
2609       return MatchOperand_ParseFail;
2610     }
2611     Op.setModifiers(Mods);
2612   }
2613 
2614   return MatchOperand_Success;
2615 }
2616 
2617 OperandMatchResultTy
2618 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2619   return parseRegOrImmWithFPInputMods(Operands, false);
2620 }
2621 
2622 OperandMatchResultTy
2623 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2624   return parseRegOrImmWithIntInputMods(Operands, false);
2625 }
2626 
2627 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2628   auto Loc = getLoc();
2629   if (trySkipId("off")) {
2630     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2631                                                 AMDGPUOperand::ImmTyOff, false));
2632     return MatchOperand_Success;
2633   }
2634 
2635   if (!isRegister())
2636     return MatchOperand_NoMatch;
2637 
2638   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2639   if (Reg) {
2640     Operands.push_back(std::move(Reg));
2641     return MatchOperand_Success;
2642   }
2643 
2644   return MatchOperand_ParseFail;
2645 
2646 }
2647 
2648 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2649   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2650 
2651   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2652       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2653       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2654       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2655     return Match_InvalidOperand;
2656 
2657   if ((TSFlags & SIInstrFlags::VOP3) &&
2658       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2659       getForcedEncodingSize() != 64)
2660     return Match_PreferE32;
2661 
2662   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2663       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2664     // v_mac_f32/16 allow only dst_sel == DWORD;
2665     auto OpNum =
2666         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2667     const auto &Op = Inst.getOperand(OpNum);
2668     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2669       return Match_InvalidOperand;
2670     }
2671   }
2672 
2673   return Match_Success;
2674 }
2675 
2676 // What asm variants we should check
2677 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2678   if (getForcedEncodingSize() == 32) {
2679     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2680     return makeArrayRef(Variants);
2681   }
2682 
2683   if (isForcedVOP3()) {
2684     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2685     return makeArrayRef(Variants);
2686   }
2687 
2688   if (isForcedSDWA()) {
2689     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2690                                         AMDGPUAsmVariants::SDWA9};
2691     return makeArrayRef(Variants);
2692   }
2693 
2694   if (isForcedDPP()) {
2695     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2696     return makeArrayRef(Variants);
2697   }
2698 
2699   static const unsigned Variants[] = {
2700     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2701     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2702   };
2703 
2704   return makeArrayRef(Variants);
2705 }
2706 
2707 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2708   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2709   const unsigned Num = Desc.getNumImplicitUses();
2710   for (unsigned i = 0; i < Num; ++i) {
2711     unsigned Reg = Desc.ImplicitUses[i];
2712     switch (Reg) {
2713     case AMDGPU::FLAT_SCR:
2714     case AMDGPU::VCC:
2715     case AMDGPU::VCC_LO:
2716     case AMDGPU::VCC_HI:
2717     case AMDGPU::M0:
2718       return Reg;
2719     default:
2720       break;
2721     }
2722   }
2723   return AMDGPU::NoRegister;
2724 }
2725 
2726 // NB: This code is correct only when used to check constant
2727 // bus limitations because GFX7 support no f16 inline constants.
2728 // Note that there are no cases when a GFX7 opcode violates
2729 // constant bus limitations due to the use of an f16 constant.
2730 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2731                                        unsigned OpIdx) const {
2732   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2733 
2734   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2735     return false;
2736   }
2737 
2738   const MCOperand &MO = Inst.getOperand(OpIdx);
2739 
2740   int64_t Val = MO.getImm();
2741   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2742 
2743   switch (OpSize) { // expected operand size
2744   case 8:
2745     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2746   case 4:
2747     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2748   case 2: {
2749     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2750     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2751         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2752         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2753         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2754         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2755         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2756       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2757     } else {
2758       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2759     }
2760   }
2761   default:
2762     llvm_unreachable("invalid operand size");
2763   }
2764 }
2765 
2766 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2767   if (!isGFX10())
2768     return 1;
2769 
2770   switch (Opcode) {
2771   // 64-bit shift instructions can use only one scalar value input
2772   case AMDGPU::V_LSHLREV_B64:
2773   case AMDGPU::V_LSHLREV_B64_gfx10:
2774   case AMDGPU::V_LSHL_B64:
2775   case AMDGPU::V_LSHRREV_B64:
2776   case AMDGPU::V_LSHRREV_B64_gfx10:
2777   case AMDGPU::V_LSHR_B64:
2778   case AMDGPU::V_ASHRREV_I64:
2779   case AMDGPU::V_ASHRREV_I64_gfx10:
2780   case AMDGPU::V_ASHR_I64:
2781     return 1;
2782   default:
2783     return 2;
2784   }
2785 }
2786 
2787 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2788   const MCOperand &MO = Inst.getOperand(OpIdx);
2789   if (MO.isImm()) {
2790     return !isInlineConstant(Inst, OpIdx);
2791   } else if (MO.isReg()) {
2792     auto Reg = MO.getReg();
2793     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2794     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2795   } else {
2796     return true;
2797   }
2798 }
2799 
2800 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2801   const unsigned Opcode = Inst.getOpcode();
2802   const MCInstrDesc &Desc = MII.get(Opcode);
2803   unsigned ConstantBusUseCount = 0;
2804   unsigned NumLiterals = 0;
2805   unsigned LiteralSize;
2806 
2807   if (Desc.TSFlags &
2808       (SIInstrFlags::VOPC |
2809        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2810        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2811        SIInstrFlags::SDWA)) {
2812     // Check special imm operands (used by madmk, etc)
2813     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2814       ++ConstantBusUseCount;
2815     }
2816 
2817     SmallDenseSet<unsigned> SGPRsUsed;
2818     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2819     if (SGPRUsed != AMDGPU::NoRegister) {
2820       SGPRsUsed.insert(SGPRUsed);
2821       ++ConstantBusUseCount;
2822     }
2823 
2824     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2825     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2826     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2827 
2828     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2829 
2830     for (int OpIdx : OpIndices) {
2831       if (OpIdx == -1) break;
2832 
2833       const MCOperand &MO = Inst.getOperand(OpIdx);
2834       if (usesConstantBus(Inst, OpIdx)) {
2835         if (MO.isReg()) {
2836           const unsigned Reg = mc2PseudoReg(MO.getReg());
2837           // Pairs of registers with a partial intersections like these
2838           //   s0, s[0:1]
2839           //   flat_scratch_lo, flat_scratch
2840           //   flat_scratch_lo, flat_scratch_hi
2841           // are theoretically valid but they are disabled anyway.
2842           // Note that this code mimics SIInstrInfo::verifyInstruction
2843           if (!SGPRsUsed.count(Reg)) {
2844             SGPRsUsed.insert(Reg);
2845             ++ConstantBusUseCount;
2846           }
2847         } else { // Expression or a literal
2848 
2849           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2850             continue; // special operand like VINTERP attr_chan
2851 
2852           // An instruction may use only one literal.
2853           // This has been validated on the previous step.
2854           // See validateVOP3Literal.
2855           // This literal may be used as more than one operand.
2856           // If all these operands are of the same size,
2857           // this literal counts as one scalar value.
2858           // Otherwise it counts as 2 scalar values.
2859           // See "GFX10 Shader Programming", section 3.6.2.3.
2860 
2861           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2862           if (Size < 4) Size = 4;
2863 
2864           if (NumLiterals == 0) {
2865             NumLiterals = 1;
2866             LiteralSize = Size;
2867           } else if (LiteralSize != Size) {
2868             NumLiterals = 2;
2869           }
2870         }
2871       }
2872     }
2873   }
2874   ConstantBusUseCount += NumLiterals;
2875 
2876   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2877 }
2878 
2879 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2880   const unsigned Opcode = Inst.getOpcode();
2881   const MCInstrDesc &Desc = MII.get(Opcode);
2882 
2883   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2884   if (DstIdx == -1 ||
2885       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2886     return true;
2887   }
2888 
2889   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2890 
2891   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2892   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2893   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2894 
2895   assert(DstIdx != -1);
2896   const MCOperand &Dst = Inst.getOperand(DstIdx);
2897   assert(Dst.isReg());
2898   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2899 
2900   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2901 
2902   for (int SrcIdx : SrcIndices) {
2903     if (SrcIdx == -1) break;
2904     const MCOperand &Src = Inst.getOperand(SrcIdx);
2905     if (Src.isReg()) {
2906       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2907       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2908         return false;
2909       }
2910     }
2911   }
2912 
2913   return true;
2914 }
2915 
2916 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2917 
2918   const unsigned Opc = Inst.getOpcode();
2919   const MCInstrDesc &Desc = MII.get(Opc);
2920 
2921   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2922     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2923     assert(ClampIdx != -1);
2924     return Inst.getOperand(ClampIdx).getImm() == 0;
2925   }
2926 
2927   return true;
2928 }
2929 
2930 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2931 
2932   const unsigned Opc = Inst.getOpcode();
2933   const MCInstrDesc &Desc = MII.get(Opc);
2934 
2935   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2936     return true;
2937 
2938   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2939   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2940   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2941 
2942   assert(VDataIdx != -1);
2943   assert(DMaskIdx != -1);
2944   assert(TFEIdx != -1);
2945 
2946   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2947   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2948   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2949   if (DMask == 0)
2950     DMask = 1;
2951 
2952   unsigned DataSize =
2953     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2954   if (hasPackedD16()) {
2955     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2956     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2957       DataSize = (DataSize + 1) / 2;
2958   }
2959 
2960   return (VDataSize / 4) == DataSize + TFESize;
2961 }
2962 
2963 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2964   const unsigned Opc = Inst.getOpcode();
2965   const MCInstrDesc &Desc = MII.get(Opc);
2966 
2967   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2968     return true;
2969 
2970   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2971   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2972       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2973   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2974   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2975   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2976 
2977   assert(VAddr0Idx != -1);
2978   assert(SrsrcIdx != -1);
2979   assert(DimIdx != -1);
2980   assert(SrsrcIdx > VAddr0Idx);
2981 
2982   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2983   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2984   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2985   unsigned VAddrSize =
2986       IsNSA ? SrsrcIdx - VAddr0Idx
2987             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2988 
2989   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2990                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2991                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2992                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2993   if (!IsNSA) {
2994     if (AddrSize > 8)
2995       AddrSize = 16;
2996     else if (AddrSize > 4)
2997       AddrSize = 8;
2998   }
2999 
3000   return VAddrSize == AddrSize;
3001 }
3002 
3003 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3004 
3005   const unsigned Opc = Inst.getOpcode();
3006   const MCInstrDesc &Desc = MII.get(Opc);
3007 
3008   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3009     return true;
3010   if (!Desc.mayLoad() || !Desc.mayStore())
3011     return true; // Not atomic
3012 
3013   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3014   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3015 
3016   // This is an incomplete check because image_atomic_cmpswap
3017   // may only use 0x3 and 0xf while other atomic operations
3018   // may use 0x1 and 0x3. However these limitations are
3019   // verified when we check that dmask matches dst size.
3020   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3021 }
3022 
3023 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3024 
3025   const unsigned Opc = Inst.getOpcode();
3026   const MCInstrDesc &Desc = MII.get(Opc);
3027 
3028   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3029     return true;
3030 
3031   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3032   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3033 
3034   // GATHER4 instructions use dmask in a different fashion compared to
3035   // other MIMG instructions. The only useful DMASK values are
3036   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3037   // (red,red,red,red) etc.) The ISA document doesn't mention
3038   // this.
3039   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3040 }
3041 
3042 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3043 
3044   const unsigned Opc = Inst.getOpcode();
3045   const MCInstrDesc &Desc = MII.get(Opc);
3046 
3047   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3048     return true;
3049 
3050   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3051   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3052     if (isCI() || isSI())
3053       return false;
3054   }
3055 
3056   return true;
3057 }
3058 
3059 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3060   const unsigned Opc = Inst.getOpcode();
3061   const MCInstrDesc &Desc = MII.get(Opc);
3062 
3063   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3064     return true;
3065 
3066   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3067   if (DimIdx < 0)
3068     return true;
3069 
3070   long Imm = Inst.getOperand(DimIdx).getImm();
3071   if (Imm < 0 || Imm >= 8)
3072     return false;
3073 
3074   return true;
3075 }
3076 
3077 static bool IsRevOpcode(const unsigned Opcode)
3078 {
3079   switch (Opcode) {
3080   case AMDGPU::V_SUBREV_F32_e32:
3081   case AMDGPU::V_SUBREV_F32_e64:
3082   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3083   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3084   case AMDGPU::V_SUBREV_F32_e32_vi:
3085   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3086   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3087   case AMDGPU::V_SUBREV_F32_e64_vi:
3088 
3089   case AMDGPU::V_SUBREV_I32_e32:
3090   case AMDGPU::V_SUBREV_I32_e64:
3091   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3092   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3093 
3094   case AMDGPU::V_SUBBREV_U32_e32:
3095   case AMDGPU::V_SUBBREV_U32_e64:
3096   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3097   case AMDGPU::V_SUBBREV_U32_e32_vi:
3098   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3099   case AMDGPU::V_SUBBREV_U32_e64_vi:
3100 
3101   case AMDGPU::V_SUBREV_U32_e32:
3102   case AMDGPU::V_SUBREV_U32_e64:
3103   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3104   case AMDGPU::V_SUBREV_U32_e32_vi:
3105   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3106   case AMDGPU::V_SUBREV_U32_e64_vi:
3107 
3108   case AMDGPU::V_SUBREV_F16_e32:
3109   case AMDGPU::V_SUBREV_F16_e64:
3110   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3111   case AMDGPU::V_SUBREV_F16_e32_vi:
3112   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3113   case AMDGPU::V_SUBREV_F16_e64_vi:
3114 
3115   case AMDGPU::V_SUBREV_U16_e32:
3116   case AMDGPU::V_SUBREV_U16_e64:
3117   case AMDGPU::V_SUBREV_U16_e32_vi:
3118   case AMDGPU::V_SUBREV_U16_e64_vi:
3119 
3120   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3121   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3122   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3123 
3124   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3125   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3126 
3127   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3128   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3129 
3130   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3131   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3132 
3133   case AMDGPU::V_LSHRREV_B32_e32:
3134   case AMDGPU::V_LSHRREV_B32_e64:
3135   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3136   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3137   case AMDGPU::V_LSHRREV_B32_e32_vi:
3138   case AMDGPU::V_LSHRREV_B32_e64_vi:
3139   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3140   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3141 
3142   case AMDGPU::V_ASHRREV_I32_e32:
3143   case AMDGPU::V_ASHRREV_I32_e64:
3144   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3145   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3146   case AMDGPU::V_ASHRREV_I32_e32_vi:
3147   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3148   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3149   case AMDGPU::V_ASHRREV_I32_e64_vi:
3150 
3151   case AMDGPU::V_LSHLREV_B32_e32:
3152   case AMDGPU::V_LSHLREV_B32_e64:
3153   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3154   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3155   case AMDGPU::V_LSHLREV_B32_e32_vi:
3156   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3157   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3158   case AMDGPU::V_LSHLREV_B32_e64_vi:
3159 
3160   case AMDGPU::V_LSHLREV_B16_e32:
3161   case AMDGPU::V_LSHLREV_B16_e64:
3162   case AMDGPU::V_LSHLREV_B16_e32_vi:
3163   case AMDGPU::V_LSHLREV_B16_e64_vi:
3164   case AMDGPU::V_LSHLREV_B16_gfx10:
3165 
3166   case AMDGPU::V_LSHRREV_B16_e32:
3167   case AMDGPU::V_LSHRREV_B16_e64:
3168   case AMDGPU::V_LSHRREV_B16_e32_vi:
3169   case AMDGPU::V_LSHRREV_B16_e64_vi:
3170   case AMDGPU::V_LSHRREV_B16_gfx10:
3171 
3172   case AMDGPU::V_ASHRREV_I16_e32:
3173   case AMDGPU::V_ASHRREV_I16_e64:
3174   case AMDGPU::V_ASHRREV_I16_e32_vi:
3175   case AMDGPU::V_ASHRREV_I16_e64_vi:
3176   case AMDGPU::V_ASHRREV_I16_gfx10:
3177 
3178   case AMDGPU::V_LSHLREV_B64:
3179   case AMDGPU::V_LSHLREV_B64_gfx10:
3180   case AMDGPU::V_LSHLREV_B64_vi:
3181 
3182   case AMDGPU::V_LSHRREV_B64:
3183   case AMDGPU::V_LSHRREV_B64_gfx10:
3184   case AMDGPU::V_LSHRREV_B64_vi:
3185 
3186   case AMDGPU::V_ASHRREV_I64:
3187   case AMDGPU::V_ASHRREV_I64_gfx10:
3188   case AMDGPU::V_ASHRREV_I64_vi:
3189 
3190   case AMDGPU::V_PK_LSHLREV_B16:
3191   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3192   case AMDGPU::V_PK_LSHLREV_B16_vi:
3193 
3194   case AMDGPU::V_PK_LSHRREV_B16:
3195   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3196   case AMDGPU::V_PK_LSHRREV_B16_vi:
3197   case AMDGPU::V_PK_ASHRREV_I16:
3198   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3199   case AMDGPU::V_PK_ASHRREV_I16_vi:
3200     return true;
3201   default:
3202     return false;
3203   }
3204 }
3205 
3206 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3207 
3208   using namespace SIInstrFlags;
3209   const unsigned Opcode = Inst.getOpcode();
3210   const MCInstrDesc &Desc = MII.get(Opcode);
3211 
3212   // lds_direct register is defined so that it can be used
3213   // with 9-bit operands only. Ignore encodings which do not accept these.
3214   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3215     return true;
3216 
3217   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3218   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3219   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3220 
3221   const int SrcIndices[] = { Src1Idx, Src2Idx };
3222 
3223   // lds_direct cannot be specified as either src1 or src2.
3224   for (int SrcIdx : SrcIndices) {
3225     if (SrcIdx == -1) break;
3226     const MCOperand &Src = Inst.getOperand(SrcIdx);
3227     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3228       return false;
3229     }
3230   }
3231 
3232   if (Src0Idx == -1)
3233     return true;
3234 
3235   const MCOperand &Src = Inst.getOperand(Src0Idx);
3236   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3237     return true;
3238 
3239   // lds_direct is specified as src0. Check additional limitations.
3240   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3241 }
3242 
3243 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3244   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3245     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3246     if (Op.isFlatOffset())
3247       return Op.getStartLoc();
3248   }
3249   return getLoc();
3250 }
3251 
3252 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3253                                          const OperandVector &Operands) {
3254   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3255   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3256     return true;
3257 
3258   auto Opcode = Inst.getOpcode();
3259   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3260   assert(OpNum != -1);
3261 
3262   const auto &Op = Inst.getOperand(OpNum);
3263   if (!hasFlatOffsets() && Op.getImm() != 0) {
3264     Error(getFlatOffsetLoc(Operands),
3265           "flat offset modifier is not supported on this GPU");
3266     return false;
3267   }
3268 
3269   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3270   // For FLAT segment the offset must be positive;
3271   // MSB is ignored and forced to zero.
3272   unsigned OffsetSize = isGFX9() ? 13 : 12;
3273   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3274     if (!isIntN(OffsetSize, Op.getImm())) {
3275       Error(getFlatOffsetLoc(Operands),
3276             isGFX9() ? "expected a 13-bit signed offset" :
3277                        "expected a 12-bit signed offset");
3278       return false;
3279     }
3280   } else {
3281     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3282       Error(getFlatOffsetLoc(Operands),
3283             isGFX9() ? "expected a 12-bit unsigned offset" :
3284                        "expected an 11-bit unsigned offset");
3285       return false;
3286     }
3287   }
3288 
3289   return true;
3290 }
3291 
3292 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3293   unsigned Opcode = Inst.getOpcode();
3294   const MCInstrDesc &Desc = MII.get(Opcode);
3295   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3296     return true;
3297 
3298   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3299   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3300 
3301   const int OpIndices[] = { Src0Idx, Src1Idx };
3302 
3303   unsigned NumExprs = 0;
3304   unsigned NumLiterals = 0;
3305   uint32_t LiteralValue;
3306 
3307   for (int OpIdx : OpIndices) {
3308     if (OpIdx == -1) break;
3309 
3310     const MCOperand &MO = Inst.getOperand(OpIdx);
3311     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3312     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3313       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3314         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3315         if (NumLiterals == 0 || LiteralValue != Value) {
3316           LiteralValue = Value;
3317           ++NumLiterals;
3318         }
3319       } else if (MO.isExpr()) {
3320         ++NumExprs;
3321       }
3322     }
3323   }
3324 
3325   return NumLiterals + NumExprs <= 1;
3326 }
3327 
3328 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3329   const unsigned Opc = Inst.getOpcode();
3330   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3331       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3332     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3333     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3334 
3335     if (OpSel & ~3)
3336       return false;
3337   }
3338   return true;
3339 }
3340 
3341 // Check if VCC register matches wavefront size
3342 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3343   auto FB = getFeatureBits();
3344   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3345     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3346 }
3347 
3348 // VOP3 literal is only allowed in GFX10+ and only one can be used
3349 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3350   unsigned Opcode = Inst.getOpcode();
3351   const MCInstrDesc &Desc = MII.get(Opcode);
3352   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3353     return true;
3354 
3355   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3356   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3357   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3358 
3359   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3360 
3361   unsigned NumExprs = 0;
3362   unsigned NumLiterals = 0;
3363   uint32_t LiteralValue;
3364 
3365   for (int OpIdx : OpIndices) {
3366     if (OpIdx == -1) break;
3367 
3368     const MCOperand &MO = Inst.getOperand(OpIdx);
3369     if (!MO.isImm() && !MO.isExpr())
3370       continue;
3371     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3372       continue;
3373 
3374     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3375         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3376       return false;
3377 
3378     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3379       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3380       if (NumLiterals == 0 || LiteralValue != Value) {
3381         LiteralValue = Value;
3382         ++NumLiterals;
3383       }
3384     } else if (MO.isExpr()) {
3385       ++NumExprs;
3386     }
3387   }
3388   NumLiterals += NumExprs;
3389 
3390   return !NumLiterals ||
3391          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3392 }
3393 
3394 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3395                                           const SMLoc &IDLoc,
3396                                           const OperandVector &Operands) {
3397   if (!validateLdsDirect(Inst)) {
3398     Error(IDLoc,
3399       "invalid use of lds_direct");
3400     return false;
3401   }
3402   if (!validateSOPLiteral(Inst)) {
3403     Error(IDLoc,
3404       "only one literal operand is allowed");
3405     return false;
3406   }
3407   if (!validateVOP3Literal(Inst)) {
3408     Error(IDLoc,
3409       "invalid literal operand");
3410     return false;
3411   }
3412   if (!validateConstantBusLimitations(Inst)) {
3413     Error(IDLoc,
3414       "invalid operand (violates constant bus restrictions)");
3415     return false;
3416   }
3417   if (!validateEarlyClobberLimitations(Inst)) {
3418     Error(IDLoc,
3419       "destination must be different than all sources");
3420     return false;
3421   }
3422   if (!validateIntClampSupported(Inst)) {
3423     Error(IDLoc,
3424       "integer clamping is not supported on this GPU");
3425     return false;
3426   }
3427   if (!validateOpSel(Inst)) {
3428     Error(IDLoc,
3429       "invalid op_sel operand");
3430     return false;
3431   }
3432   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3433   if (!validateMIMGD16(Inst)) {
3434     Error(IDLoc,
3435       "d16 modifier is not supported on this GPU");
3436     return false;
3437   }
3438   if (!validateMIMGDim(Inst)) {
3439     Error(IDLoc, "dim modifier is required on this GPU");
3440     return false;
3441   }
3442   if (!validateMIMGDataSize(Inst)) {
3443     Error(IDLoc,
3444       "image data size does not match dmask and tfe");
3445     return false;
3446   }
3447   if (!validateMIMGAddrSize(Inst)) {
3448     Error(IDLoc,
3449       "image address size does not match dim and a16");
3450     return false;
3451   }
3452   if (!validateMIMGAtomicDMask(Inst)) {
3453     Error(IDLoc,
3454       "invalid atomic image dmask");
3455     return false;
3456   }
3457   if (!validateMIMGGatherDMask(Inst)) {
3458     Error(IDLoc,
3459       "invalid image_gather dmask: only one bit must be set");
3460     return false;
3461   }
3462   if (!validateFlatOffset(Inst, Operands)) {
3463     return false;
3464   }
3465 
3466   return true;
3467 }
3468 
3469 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3470                                             const FeatureBitset &FBS,
3471                                             unsigned VariantID = 0);
3472 
3473 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3474                                               OperandVector &Operands,
3475                                               MCStreamer &Out,
3476                                               uint64_t &ErrorInfo,
3477                                               bool MatchingInlineAsm) {
3478   MCInst Inst;
3479   unsigned Result = Match_Success;
3480   for (auto Variant : getMatchedVariants()) {
3481     uint64_t EI;
3482     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3483                                   Variant);
3484     // We order match statuses from least to most specific. We use most specific
3485     // status as resulting
3486     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3487     if ((R == Match_Success) ||
3488         (R == Match_PreferE32) ||
3489         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3490         (R == Match_InvalidOperand && Result != Match_MissingFeature
3491                                    && Result != Match_PreferE32) ||
3492         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3493                                    && Result != Match_MissingFeature
3494                                    && Result != Match_PreferE32)) {
3495       Result = R;
3496       ErrorInfo = EI;
3497     }
3498     if (R == Match_Success)
3499       break;
3500   }
3501 
3502   switch (Result) {
3503   default: break;
3504   case Match_Success:
3505     if (!validateInstruction(Inst, IDLoc, Operands)) {
3506       return true;
3507     }
3508     Inst.setLoc(IDLoc);
3509     Out.EmitInstruction(Inst, getSTI());
3510     return false;
3511 
3512   case Match_MissingFeature:
3513     return Error(IDLoc, "instruction not supported on this GPU");
3514 
3515   case Match_MnemonicFail: {
3516     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3517     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3518         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3519     return Error(IDLoc, "invalid instruction" + Suggestion,
3520                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3521   }
3522 
3523   case Match_InvalidOperand: {
3524     SMLoc ErrorLoc = IDLoc;
3525     if (ErrorInfo != ~0ULL) {
3526       if (ErrorInfo >= Operands.size()) {
3527         return Error(IDLoc, "too few operands for instruction");
3528       }
3529       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3530       if (ErrorLoc == SMLoc())
3531         ErrorLoc = IDLoc;
3532     }
3533     return Error(ErrorLoc, "invalid operand for instruction");
3534   }
3535 
3536   case Match_PreferE32:
3537     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3538                         "should be encoded as e32");
3539   }
3540   llvm_unreachable("Implement any new match types added!");
3541 }
3542 
3543 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3544   int64_t Tmp = -1;
3545   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3546     return true;
3547   }
3548   if (getParser().parseAbsoluteExpression(Tmp)) {
3549     return true;
3550   }
3551   Ret = static_cast<uint32_t>(Tmp);
3552   return false;
3553 }
3554 
3555 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3556                                                uint32_t &Minor) {
3557   if (ParseAsAbsoluteExpression(Major))
3558     return TokError("invalid major version");
3559 
3560   if (getLexer().isNot(AsmToken::Comma))
3561     return TokError("minor version number required, comma expected");
3562   Lex();
3563 
3564   if (ParseAsAbsoluteExpression(Minor))
3565     return TokError("invalid minor version");
3566 
3567   return false;
3568 }
3569 
3570 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3571   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3572     return TokError("directive only supported for amdgcn architecture");
3573 
3574   std::string Target;
3575 
3576   SMLoc TargetStart = getTok().getLoc();
3577   if (getParser().parseEscapedString(Target))
3578     return true;
3579   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3580 
3581   std::string ExpectedTarget;
3582   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3583   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3584 
3585   if (Target != ExpectedTargetOS.str())
3586     return getParser().Error(TargetRange.Start, "target must match options",
3587                              TargetRange);
3588 
3589   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3590   return false;
3591 }
3592 
3593 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3594   return getParser().Error(Range.Start, "value out of range", Range);
3595 }
3596 
3597 bool AMDGPUAsmParser::calculateGPRBlocks(
3598     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3599     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3600     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3601     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3602   // TODO(scott.linder): These calculations are duplicated from
3603   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3604   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3605 
3606   unsigned NumVGPRs = NextFreeVGPR;
3607   unsigned NumSGPRs = NextFreeSGPR;
3608 
3609   if (Version.Major >= 10)
3610     NumSGPRs = 0;
3611   else {
3612     unsigned MaxAddressableNumSGPRs =
3613         IsaInfo::getAddressableNumSGPRs(&getSTI());
3614 
3615     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3616         NumSGPRs > MaxAddressableNumSGPRs)
3617       return OutOfRangeError(SGPRRange);
3618 
3619     NumSGPRs +=
3620         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3621 
3622     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3623         NumSGPRs > MaxAddressableNumSGPRs)
3624       return OutOfRangeError(SGPRRange);
3625 
3626     if (Features.test(FeatureSGPRInitBug))
3627       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3628   }
3629 
3630   VGPRBlocks =
3631       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3632   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3633 
3634   return false;
3635 }
3636 
3637 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3638   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3639     return TokError("directive only supported for amdgcn architecture");
3640 
3641   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3642     return TokError("directive only supported for amdhsa OS");
3643 
3644   StringRef KernelName;
3645   if (getParser().parseIdentifier(KernelName))
3646     return true;
3647 
3648   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3649 
3650   StringSet<> Seen;
3651 
3652   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3653 
3654   SMRange VGPRRange;
3655   uint64_t NextFreeVGPR = 0;
3656   SMRange SGPRRange;
3657   uint64_t NextFreeSGPR = 0;
3658   unsigned UserSGPRCount = 0;
3659   bool ReserveVCC = true;
3660   bool ReserveFlatScr = true;
3661   bool ReserveXNACK = hasXNACK();
3662   Optional<bool> EnableWavefrontSize32;
3663 
3664   while (true) {
3665     while (getLexer().is(AsmToken::EndOfStatement))
3666       Lex();
3667 
3668     if (getLexer().isNot(AsmToken::Identifier))
3669       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3670 
3671     StringRef ID = getTok().getIdentifier();
3672     SMRange IDRange = getTok().getLocRange();
3673     Lex();
3674 
3675     if (ID == ".end_amdhsa_kernel")
3676       break;
3677 
3678     if (Seen.find(ID) != Seen.end())
3679       return TokError(".amdhsa_ directives cannot be repeated");
3680     Seen.insert(ID);
3681 
3682     SMLoc ValStart = getTok().getLoc();
3683     int64_t IVal;
3684     if (getParser().parseAbsoluteExpression(IVal))
3685       return true;
3686     SMLoc ValEnd = getTok().getLoc();
3687     SMRange ValRange = SMRange(ValStart, ValEnd);
3688 
3689     if (IVal < 0)
3690       return OutOfRangeError(ValRange);
3691 
3692     uint64_t Val = IVal;
3693 
3694 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3695   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3696     return OutOfRangeError(RANGE);                                             \
3697   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3698 
3699     if (ID == ".amdhsa_group_segment_fixed_size") {
3700       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3701         return OutOfRangeError(ValRange);
3702       KD.group_segment_fixed_size = Val;
3703     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3704       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3705         return OutOfRangeError(ValRange);
3706       KD.private_segment_fixed_size = Val;
3707     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3708       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3709                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3710                        Val, ValRange);
3711       if (Val)
3712         UserSGPRCount += 4;
3713     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3714       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3715                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3716                        ValRange);
3717       if (Val)
3718         UserSGPRCount += 2;
3719     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3720       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3721                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3722                        ValRange);
3723       if (Val)
3724         UserSGPRCount += 2;
3725     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3726       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3727                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3728                        Val, ValRange);
3729       if (Val)
3730         UserSGPRCount += 2;
3731     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3732       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3733                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3734                        ValRange);
3735       if (Val)
3736         UserSGPRCount += 2;
3737     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3738       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3739                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3740                        ValRange);
3741       if (Val)
3742         UserSGPRCount += 2;
3743     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3744       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3745                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3746                        Val, ValRange);
3747       if (Val)
3748         UserSGPRCount += 1;
3749     } else if (ID == ".amdhsa_wavefront_size32") {
3750       if (IVersion.Major < 10)
3751         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3752                                  IDRange);
3753       EnableWavefrontSize32 = Val;
3754       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3755                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3756                        Val, ValRange);
3757     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3758       PARSE_BITS_ENTRY(
3759           KD.compute_pgm_rsrc2,
3760           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3761           ValRange);
3762     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3763       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3764                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3765                        ValRange);
3766     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3767       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3768                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3769                        ValRange);
3770     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3771       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3772                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3773                        ValRange);
3774     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3775       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3776                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3777                        ValRange);
3778     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3779       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3780                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3781                        ValRange);
3782     } else if (ID == ".amdhsa_next_free_vgpr") {
3783       VGPRRange = ValRange;
3784       NextFreeVGPR = Val;
3785     } else if (ID == ".amdhsa_next_free_sgpr") {
3786       SGPRRange = ValRange;
3787       NextFreeSGPR = Val;
3788     } else if (ID == ".amdhsa_reserve_vcc") {
3789       if (!isUInt<1>(Val))
3790         return OutOfRangeError(ValRange);
3791       ReserveVCC = Val;
3792     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3793       if (IVersion.Major < 7)
3794         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3795                                  IDRange);
3796       if (!isUInt<1>(Val))
3797         return OutOfRangeError(ValRange);
3798       ReserveFlatScr = Val;
3799     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3800       if (IVersion.Major < 8)
3801         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3802                                  IDRange);
3803       if (!isUInt<1>(Val))
3804         return OutOfRangeError(ValRange);
3805       ReserveXNACK = Val;
3806     } else if (ID == ".amdhsa_float_round_mode_32") {
3807       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3808                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3809     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3810       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3811                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3812     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3813       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3814                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3815     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3816       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3817                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3818                        ValRange);
3819     } else if (ID == ".amdhsa_dx10_clamp") {
3820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3821                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3822     } else if (ID == ".amdhsa_ieee_mode") {
3823       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3824                        Val, ValRange);
3825     } else if (ID == ".amdhsa_fp16_overflow") {
3826       if (IVersion.Major < 9)
3827         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3828                                  IDRange);
3829       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3830                        ValRange);
3831     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3832       if (IVersion.Major < 10)
3833         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3834                                  IDRange);
3835       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3836                        ValRange);
3837     } else if (ID == ".amdhsa_memory_ordered") {
3838       if (IVersion.Major < 10)
3839         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3840                                  IDRange);
3841       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3842                        ValRange);
3843     } else if (ID == ".amdhsa_forward_progress") {
3844       if (IVersion.Major < 10)
3845         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3846                                  IDRange);
3847       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3848                        ValRange);
3849     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3850       PARSE_BITS_ENTRY(
3851           KD.compute_pgm_rsrc2,
3852           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3853           ValRange);
3854     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3855       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3856                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3857                        Val, ValRange);
3858     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3859       PARSE_BITS_ENTRY(
3860           KD.compute_pgm_rsrc2,
3861           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3862           ValRange);
3863     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3865                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3866                        Val, ValRange);
3867     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3868       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3869                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3870                        Val, ValRange);
3871     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3872       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3873                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3874                        Val, ValRange);
3875     } else if (ID == ".amdhsa_exception_int_div_zero") {
3876       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3877                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3878                        Val, ValRange);
3879     } else {
3880       return getParser().Error(IDRange.Start,
3881                                "unknown .amdhsa_kernel directive", IDRange);
3882     }
3883 
3884 #undef PARSE_BITS_ENTRY
3885   }
3886 
3887   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3888     return TokError(".amdhsa_next_free_vgpr directive is required");
3889 
3890   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3891     return TokError(".amdhsa_next_free_sgpr directive is required");
3892 
3893   unsigned VGPRBlocks;
3894   unsigned SGPRBlocks;
3895   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3896                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3897                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3898                          SGPRBlocks))
3899     return true;
3900 
3901   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3902           VGPRBlocks))
3903     return OutOfRangeError(VGPRRange);
3904   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3905                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3906 
3907   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3908           SGPRBlocks))
3909     return OutOfRangeError(SGPRRange);
3910   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3911                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3912                   SGPRBlocks);
3913 
3914   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3915     return TokError("too many user SGPRs enabled");
3916   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3917                   UserSGPRCount);
3918 
3919   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3920       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3921       ReserveFlatScr, ReserveXNACK);
3922   return false;
3923 }
3924 
3925 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3926   uint32_t Major;
3927   uint32_t Minor;
3928 
3929   if (ParseDirectiveMajorMinor(Major, Minor))
3930     return true;
3931 
3932   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3933   return false;
3934 }
3935 
3936 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3937   uint32_t Major;
3938   uint32_t Minor;
3939   uint32_t Stepping;
3940   StringRef VendorName;
3941   StringRef ArchName;
3942 
3943   // If this directive has no arguments, then use the ISA version for the
3944   // targeted GPU.
3945   if (getLexer().is(AsmToken::EndOfStatement)) {
3946     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3947     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3948                                                       ISA.Stepping,
3949                                                       "AMD", "AMDGPU");
3950     return false;
3951   }
3952 
3953   if (ParseDirectiveMajorMinor(Major, Minor))
3954     return true;
3955 
3956   if (getLexer().isNot(AsmToken::Comma))
3957     return TokError("stepping version number required, comma expected");
3958   Lex();
3959 
3960   if (ParseAsAbsoluteExpression(Stepping))
3961     return TokError("invalid stepping version");
3962 
3963   if (getLexer().isNot(AsmToken::Comma))
3964     return TokError("vendor name required, comma expected");
3965   Lex();
3966 
3967   if (getLexer().isNot(AsmToken::String))
3968     return TokError("invalid vendor name");
3969 
3970   VendorName = getLexer().getTok().getStringContents();
3971   Lex();
3972 
3973   if (getLexer().isNot(AsmToken::Comma))
3974     return TokError("arch name required, comma expected");
3975   Lex();
3976 
3977   if (getLexer().isNot(AsmToken::String))
3978     return TokError("invalid arch name");
3979 
3980   ArchName = getLexer().getTok().getStringContents();
3981   Lex();
3982 
3983   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3984                                                     VendorName, ArchName);
3985   return false;
3986 }
3987 
3988 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3989                                                amd_kernel_code_t &Header) {
3990   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3991   // assembly for backwards compatibility.
3992   if (ID == "max_scratch_backing_memory_byte_size") {
3993     Parser.eatToEndOfStatement();
3994     return false;
3995   }
3996 
3997   SmallString<40> ErrStr;
3998   raw_svector_ostream Err(ErrStr);
3999   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4000     return TokError(Err.str());
4001   }
4002   Lex();
4003 
4004   if (ID == "enable_wavefront_size32") {
4005     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4006       if (!isGFX10())
4007         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4008       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4009         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4010     } else {
4011       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4012         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4013     }
4014   }
4015 
4016   if (ID == "wavefront_size") {
4017     if (Header.wavefront_size == 5) {
4018       if (!isGFX10())
4019         return TokError("wavefront_size=5 is only allowed on GFX10+");
4020       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4021         return TokError("wavefront_size=5 requires +WavefrontSize32");
4022     } else if (Header.wavefront_size == 6) {
4023       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4024         return TokError("wavefront_size=6 requires +WavefrontSize64");
4025     }
4026   }
4027 
4028   if (ID == "enable_wgp_mode") {
4029     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4030       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4031   }
4032 
4033   if (ID == "enable_mem_ordered") {
4034     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4035       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4036   }
4037 
4038   if (ID == "enable_fwd_progress") {
4039     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4040       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4041   }
4042 
4043   return false;
4044 }
4045 
4046 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4047   amd_kernel_code_t Header;
4048   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4049 
4050   while (true) {
4051     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4052     // will set the current token to EndOfStatement.
4053     while(getLexer().is(AsmToken::EndOfStatement))
4054       Lex();
4055 
4056     if (getLexer().isNot(AsmToken::Identifier))
4057       return TokError("expected value identifier or .end_amd_kernel_code_t");
4058 
4059     StringRef ID = getLexer().getTok().getIdentifier();
4060     Lex();
4061 
4062     if (ID == ".end_amd_kernel_code_t")
4063       break;
4064 
4065     if (ParseAMDKernelCodeTValue(ID, Header))
4066       return true;
4067   }
4068 
4069   getTargetStreamer().EmitAMDKernelCodeT(Header);
4070 
4071   return false;
4072 }
4073 
4074 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4075   if (getLexer().isNot(AsmToken::Identifier))
4076     return TokError("expected symbol name");
4077 
4078   StringRef KernelName = Parser.getTok().getString();
4079 
4080   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4081                                            ELF::STT_AMDGPU_HSA_KERNEL);
4082   Lex();
4083   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4084     KernelScope.initialize(getContext());
4085   return false;
4086 }
4087 
4088 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4089   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4090     return Error(getParser().getTok().getLoc(),
4091                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4092                  "architectures");
4093   }
4094 
4095   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4096 
4097   std::string ISAVersionStringFromSTI;
4098   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4099   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4100 
4101   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4102     return Error(getParser().getTok().getLoc(),
4103                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4104                  "arguments specified through the command line");
4105   }
4106 
4107   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4108   Lex();
4109 
4110   return false;
4111 }
4112 
4113 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4114   const char *AssemblerDirectiveBegin;
4115   const char *AssemblerDirectiveEnd;
4116   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4117       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4118           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4119                             HSAMD::V3::AssemblerDirectiveEnd)
4120           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4121                             HSAMD::AssemblerDirectiveEnd);
4122 
4123   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4124     return Error(getParser().getTok().getLoc(),
4125                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4126                  "not available on non-amdhsa OSes")).str());
4127   }
4128 
4129   std::string HSAMetadataString;
4130   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4131                           HSAMetadataString))
4132     return true;
4133 
4134   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4135     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4136       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4137   } else {
4138     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4139       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4140   }
4141 
4142   return false;
4143 }
4144 
4145 /// Common code to parse out a block of text (typically YAML) between start and
4146 /// end directives.
4147 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4148                                           const char *AssemblerDirectiveEnd,
4149                                           std::string &CollectString) {
4150 
4151   raw_string_ostream CollectStream(CollectString);
4152 
4153   getLexer().setSkipSpace(false);
4154 
4155   bool FoundEnd = false;
4156   while (!getLexer().is(AsmToken::Eof)) {
4157     while (getLexer().is(AsmToken::Space)) {
4158       CollectStream << getLexer().getTok().getString();
4159       Lex();
4160     }
4161 
4162     if (getLexer().is(AsmToken::Identifier)) {
4163       StringRef ID = getLexer().getTok().getIdentifier();
4164       if (ID == AssemblerDirectiveEnd) {
4165         Lex();
4166         FoundEnd = true;
4167         break;
4168       }
4169     }
4170 
4171     CollectStream << Parser.parseStringToEndOfStatement()
4172                   << getContext().getAsmInfo()->getSeparatorString();
4173 
4174     Parser.eatToEndOfStatement();
4175   }
4176 
4177   getLexer().setSkipSpace(true);
4178 
4179   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4180     return TokError(Twine("expected directive ") +
4181                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4182   }
4183 
4184   CollectStream.flush();
4185   return false;
4186 }
4187 
4188 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4189 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4190   std::string String;
4191   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4192                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4193     return true;
4194 
4195   auto PALMetadata = getTargetStreamer().getPALMetadata();
4196   if (!PALMetadata->setFromString(String))
4197     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4198   return false;
4199 }
4200 
4201 /// Parse the assembler directive for old linear-format PAL metadata.
4202 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4203   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4204     return Error(getParser().getTok().getLoc(),
4205                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4206                  "not available on non-amdpal OSes")).str());
4207   }
4208 
4209   auto PALMetadata = getTargetStreamer().getPALMetadata();
4210   PALMetadata->setLegacy();
4211   for (;;) {
4212     uint32_t Key, Value;
4213     if (ParseAsAbsoluteExpression(Key)) {
4214       return TokError(Twine("invalid value in ") +
4215                       Twine(PALMD::AssemblerDirective));
4216     }
4217     if (getLexer().isNot(AsmToken::Comma)) {
4218       return TokError(Twine("expected an even number of values in ") +
4219                       Twine(PALMD::AssemblerDirective));
4220     }
4221     Lex();
4222     if (ParseAsAbsoluteExpression(Value)) {
4223       return TokError(Twine("invalid value in ") +
4224                       Twine(PALMD::AssemblerDirective));
4225     }
4226     PALMetadata->setRegister(Key, Value);
4227     if (getLexer().isNot(AsmToken::Comma))
4228       break;
4229     Lex();
4230   }
4231   return false;
4232 }
4233 
4234 /// ParseDirectiveAMDGPULDS
4235 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4236 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4237   if (getParser().checkForValidSection())
4238     return true;
4239 
4240   StringRef Name;
4241   SMLoc NameLoc = getLexer().getLoc();
4242   if (getParser().parseIdentifier(Name))
4243     return TokError("expected identifier in directive");
4244 
4245   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4246   if (parseToken(AsmToken::Comma, "expected ','"))
4247     return true;
4248 
4249   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4250 
4251   int64_t Size;
4252   SMLoc SizeLoc = getLexer().getLoc();
4253   if (getParser().parseAbsoluteExpression(Size))
4254     return true;
4255   if (Size < 0)
4256     return Error(SizeLoc, "size must be non-negative");
4257   if (Size > LocalMemorySize)
4258     return Error(SizeLoc, "size is too large");
4259 
4260   int64_t Align = 4;
4261   if (getLexer().is(AsmToken::Comma)) {
4262     Lex();
4263     SMLoc AlignLoc = getLexer().getLoc();
4264     if (getParser().parseAbsoluteExpression(Align))
4265       return true;
4266     if (Align < 0 || !isPowerOf2_64(Align))
4267       return Error(AlignLoc, "alignment must be a power of two");
4268 
4269     // Alignment larger than the size of LDS is possible in theory, as long
4270     // as the linker manages to place to symbol at address 0, but we do want
4271     // to make sure the alignment fits nicely into a 32-bit integer.
4272     if (Align >= 1u << 31)
4273       return Error(AlignLoc, "alignment is too large");
4274   }
4275 
4276   if (parseToken(AsmToken::EndOfStatement,
4277                  "unexpected token in '.amdgpu_lds' directive"))
4278     return true;
4279 
4280   Symbol->redefineIfPossible();
4281   if (!Symbol->isUndefined())
4282     return Error(NameLoc, "invalid symbol redefinition");
4283 
4284   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4285   return false;
4286 }
4287 
4288 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4289   StringRef IDVal = DirectiveID.getString();
4290 
4291   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4292     if (IDVal == ".amdgcn_target")
4293       return ParseDirectiveAMDGCNTarget();
4294 
4295     if (IDVal == ".amdhsa_kernel")
4296       return ParseDirectiveAMDHSAKernel();
4297 
4298     // TODO: Restructure/combine with PAL metadata directive.
4299     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4300       return ParseDirectiveHSAMetadata();
4301   } else {
4302     if (IDVal == ".hsa_code_object_version")
4303       return ParseDirectiveHSACodeObjectVersion();
4304 
4305     if (IDVal == ".hsa_code_object_isa")
4306       return ParseDirectiveHSACodeObjectISA();
4307 
4308     if (IDVal == ".amd_kernel_code_t")
4309       return ParseDirectiveAMDKernelCodeT();
4310 
4311     if (IDVal == ".amdgpu_hsa_kernel")
4312       return ParseDirectiveAMDGPUHsaKernel();
4313 
4314     if (IDVal == ".amd_amdgpu_isa")
4315       return ParseDirectiveISAVersion();
4316 
4317     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4318       return ParseDirectiveHSAMetadata();
4319   }
4320 
4321   if (IDVal == ".amdgpu_lds")
4322     return ParseDirectiveAMDGPULDS();
4323 
4324   if (IDVal == PALMD::AssemblerDirectiveBegin)
4325     return ParseDirectivePALMetadataBegin();
4326 
4327   if (IDVal == PALMD::AssemblerDirective)
4328     return ParseDirectivePALMetadata();
4329 
4330   return true;
4331 }
4332 
4333 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4334                                            unsigned RegNo) const {
4335 
4336   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4337        R.isValid(); ++R) {
4338     if (*R == RegNo)
4339       return isGFX9() || isGFX10();
4340   }
4341 
4342   // GFX10 has 2 more SGPRs 104 and 105.
4343   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4344        R.isValid(); ++R) {
4345     if (*R == RegNo)
4346       return hasSGPR104_SGPR105();
4347   }
4348 
4349   switch (RegNo) {
4350   case AMDGPU::SRC_SHARED_BASE:
4351   case AMDGPU::SRC_SHARED_LIMIT:
4352   case AMDGPU::SRC_PRIVATE_BASE:
4353   case AMDGPU::SRC_PRIVATE_LIMIT:
4354   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4355     return !isCI() && !isSI() && !isVI();
4356   case AMDGPU::TBA:
4357   case AMDGPU::TBA_LO:
4358   case AMDGPU::TBA_HI:
4359   case AMDGPU::TMA:
4360   case AMDGPU::TMA_LO:
4361   case AMDGPU::TMA_HI:
4362     return !isGFX9() && !isGFX10();
4363   case AMDGPU::XNACK_MASK:
4364   case AMDGPU::XNACK_MASK_LO:
4365   case AMDGPU::XNACK_MASK_HI:
4366     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4367   case AMDGPU::SGPR_NULL:
4368     return isGFX10();
4369   default:
4370     break;
4371   }
4372 
4373   if (isCI())
4374     return true;
4375 
4376   if (isSI() || isGFX10()) {
4377     // No flat_scr on SI.
4378     // On GFX10 flat scratch is not a valid register operand and can only be
4379     // accessed with s_setreg/s_getreg.
4380     switch (RegNo) {
4381     case AMDGPU::FLAT_SCR:
4382     case AMDGPU::FLAT_SCR_LO:
4383     case AMDGPU::FLAT_SCR_HI:
4384       return false;
4385     default:
4386       return true;
4387     }
4388   }
4389 
4390   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4391   // SI/CI have.
4392   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4393        R.isValid(); ++R) {
4394     if (*R == RegNo)
4395       return hasSGPR102_SGPR103();
4396   }
4397 
4398   return true;
4399 }
4400 
4401 OperandMatchResultTy
4402 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4403                               OperandMode Mode) {
4404   // Try to parse with a custom parser
4405   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4406 
4407   // If we successfully parsed the operand or if there as an error parsing,
4408   // we are done.
4409   //
4410   // If we are parsing after we reach EndOfStatement then this means we
4411   // are appending default values to the Operands list.  This is only done
4412   // by custom parser, so we shouldn't continue on to the generic parsing.
4413   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4414       getLexer().is(AsmToken::EndOfStatement))
4415     return ResTy;
4416 
4417   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4418     unsigned Prefix = Operands.size();
4419     SMLoc LBraceLoc = getTok().getLoc();
4420     Parser.Lex(); // eat the '['
4421 
4422     for (;;) {
4423       ResTy = parseReg(Operands);
4424       if (ResTy != MatchOperand_Success)
4425         return ResTy;
4426 
4427       if (getLexer().is(AsmToken::RBrac))
4428         break;
4429 
4430       if (getLexer().isNot(AsmToken::Comma))
4431         return MatchOperand_ParseFail;
4432       Parser.Lex();
4433     }
4434 
4435     if (Operands.size() - Prefix > 1) {
4436       Operands.insert(Operands.begin() + Prefix,
4437                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4438       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4439                                                     getTok().getLoc()));
4440     }
4441 
4442     Parser.Lex(); // eat the ']'
4443     return MatchOperand_Success;
4444   }
4445 
4446   return parseRegOrImm(Operands);
4447 }
4448 
4449 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4450   // Clear any forced encodings from the previous instruction.
4451   setForcedEncodingSize(0);
4452   setForcedDPP(false);
4453   setForcedSDWA(false);
4454 
4455   if (Name.endswith("_e64")) {
4456     setForcedEncodingSize(64);
4457     return Name.substr(0, Name.size() - 4);
4458   } else if (Name.endswith("_e32")) {
4459     setForcedEncodingSize(32);
4460     return Name.substr(0, Name.size() - 4);
4461   } else if (Name.endswith("_dpp")) {
4462     setForcedDPP(true);
4463     return Name.substr(0, Name.size() - 4);
4464   } else if (Name.endswith("_sdwa")) {
4465     setForcedSDWA(true);
4466     return Name.substr(0, Name.size() - 5);
4467   }
4468   return Name;
4469 }
4470 
4471 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4472                                        StringRef Name,
4473                                        SMLoc NameLoc, OperandVector &Operands) {
4474   // Add the instruction mnemonic
4475   Name = parseMnemonicSuffix(Name);
4476   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4477 
4478   bool IsMIMG = Name.startswith("image_");
4479 
4480   while (!getLexer().is(AsmToken::EndOfStatement)) {
4481     OperandMode Mode = OperandMode_Default;
4482     if (IsMIMG && isGFX10() && Operands.size() == 2)
4483       Mode = OperandMode_NSA;
4484     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4485 
4486     // Eat the comma or space if there is one.
4487     if (getLexer().is(AsmToken::Comma))
4488       Parser.Lex();
4489 
4490     switch (Res) {
4491       case MatchOperand_Success: break;
4492       case MatchOperand_ParseFail:
4493         // FIXME: use real operand location rather than the current location.
4494         Error(getLexer().getLoc(), "failed parsing operand.");
4495         while (!getLexer().is(AsmToken::EndOfStatement)) {
4496           Parser.Lex();
4497         }
4498         return true;
4499       case MatchOperand_NoMatch:
4500         // FIXME: use real operand location rather than the current location.
4501         Error(getLexer().getLoc(), "not a valid operand.");
4502         while (!getLexer().is(AsmToken::EndOfStatement)) {
4503           Parser.Lex();
4504         }
4505         return true;
4506     }
4507   }
4508 
4509   return false;
4510 }
4511 
4512 //===----------------------------------------------------------------------===//
4513 // Utility functions
4514 //===----------------------------------------------------------------------===//
4515 
4516 OperandMatchResultTy
4517 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4518 
4519   if (!trySkipId(Prefix, AsmToken::Colon))
4520     return MatchOperand_NoMatch;
4521 
4522   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4523 }
4524 
4525 OperandMatchResultTy
4526 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4527                                     AMDGPUOperand::ImmTy ImmTy,
4528                                     bool (*ConvertResult)(int64_t&)) {
4529   SMLoc S = getLoc();
4530   int64_t Value = 0;
4531 
4532   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4533   if (Res != MatchOperand_Success)
4534     return Res;
4535 
4536   if (ConvertResult && !ConvertResult(Value)) {
4537     Error(S, "invalid " + StringRef(Prefix) + " value.");
4538   }
4539 
4540   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4541   return MatchOperand_Success;
4542 }
4543 
4544 OperandMatchResultTy
4545 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4546                                              OperandVector &Operands,
4547                                              AMDGPUOperand::ImmTy ImmTy,
4548                                              bool (*ConvertResult)(int64_t&)) {
4549   SMLoc S = getLoc();
4550   if (!trySkipId(Prefix, AsmToken::Colon))
4551     return MatchOperand_NoMatch;
4552 
4553   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4554     return MatchOperand_ParseFail;
4555 
4556   unsigned Val = 0;
4557   const unsigned MaxSize = 4;
4558 
4559   // FIXME: How to verify the number of elements matches the number of src
4560   // operands?
4561   for (int I = 0; ; ++I) {
4562     int64_t Op;
4563     SMLoc Loc = getLoc();
4564     if (!parseExpr(Op))
4565       return MatchOperand_ParseFail;
4566 
4567     if (Op != 0 && Op != 1) {
4568       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4569       return MatchOperand_ParseFail;
4570     }
4571 
4572     Val |= (Op << I);
4573 
4574     if (trySkipToken(AsmToken::RBrac))
4575       break;
4576 
4577     if (I + 1 == MaxSize) {
4578       Error(getLoc(), "expected a closing square bracket");
4579       return MatchOperand_ParseFail;
4580     }
4581 
4582     if (!skipToken(AsmToken::Comma, "expected a comma"))
4583       return MatchOperand_ParseFail;
4584   }
4585 
4586   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4587   return MatchOperand_Success;
4588 }
4589 
4590 OperandMatchResultTy
4591 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4592                                AMDGPUOperand::ImmTy ImmTy) {
4593   int64_t Bit = 0;
4594   SMLoc S = Parser.getTok().getLoc();
4595 
4596   // We are at the end of the statement, and this is a default argument, so
4597   // use a default value.
4598   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4599     switch(getLexer().getKind()) {
4600       case AsmToken::Identifier: {
4601         StringRef Tok = Parser.getTok().getString();
4602         if (Tok == Name) {
4603           if (Tok == "r128" && isGFX9())
4604             Error(S, "r128 modifier is not supported on this GPU");
4605           if (Tok == "a16" && !isGFX9() && !isGFX10())
4606             Error(S, "a16 modifier is not supported on this GPU");
4607           Bit = 1;
4608           Parser.Lex();
4609         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4610           Bit = 0;
4611           Parser.Lex();
4612         } else {
4613           return MatchOperand_NoMatch;
4614         }
4615         break;
4616       }
4617       default:
4618         return MatchOperand_NoMatch;
4619     }
4620   }
4621 
4622   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4623     return MatchOperand_ParseFail;
4624 
4625   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4626   return MatchOperand_Success;
4627 }
4628 
4629 static void addOptionalImmOperand(
4630   MCInst& Inst, const OperandVector& Operands,
4631   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4632   AMDGPUOperand::ImmTy ImmT,
4633   int64_t Default = 0) {
4634   auto i = OptionalIdx.find(ImmT);
4635   if (i != OptionalIdx.end()) {
4636     unsigned Idx = i->second;
4637     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4638   } else {
4639     Inst.addOperand(MCOperand::createImm(Default));
4640   }
4641 }
4642 
4643 OperandMatchResultTy
4644 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4645   if (getLexer().isNot(AsmToken::Identifier)) {
4646     return MatchOperand_NoMatch;
4647   }
4648   StringRef Tok = Parser.getTok().getString();
4649   if (Tok != Prefix) {
4650     return MatchOperand_NoMatch;
4651   }
4652 
4653   Parser.Lex();
4654   if (getLexer().isNot(AsmToken::Colon)) {
4655     return MatchOperand_ParseFail;
4656   }
4657 
4658   Parser.Lex();
4659   if (getLexer().isNot(AsmToken::Identifier)) {
4660     return MatchOperand_ParseFail;
4661   }
4662 
4663   Value = Parser.getTok().getString();
4664   return MatchOperand_Success;
4665 }
4666 
4667 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4668 // values to live in a joint format operand in the MCInst encoding.
4669 OperandMatchResultTy
4670 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4671   SMLoc S = Parser.getTok().getLoc();
4672   int64_t Dfmt = 0, Nfmt = 0;
4673   // dfmt and nfmt can appear in either order, and each is optional.
4674   bool GotDfmt = false, GotNfmt = false;
4675   while (!GotDfmt || !GotNfmt) {
4676     if (!GotDfmt) {
4677       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4678       if (Res != MatchOperand_NoMatch) {
4679         if (Res != MatchOperand_Success)
4680           return Res;
4681         if (Dfmt >= 16) {
4682           Error(Parser.getTok().getLoc(), "out of range dfmt");
4683           return MatchOperand_ParseFail;
4684         }
4685         GotDfmt = true;
4686         Parser.Lex();
4687         continue;
4688       }
4689     }
4690     if (!GotNfmt) {
4691       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4692       if (Res != MatchOperand_NoMatch) {
4693         if (Res != MatchOperand_Success)
4694           return Res;
4695         if (Nfmt >= 8) {
4696           Error(Parser.getTok().getLoc(), "out of range nfmt");
4697           return MatchOperand_ParseFail;
4698         }
4699         GotNfmt = true;
4700         Parser.Lex();
4701         continue;
4702       }
4703     }
4704     break;
4705   }
4706   if (!GotDfmt && !GotNfmt)
4707     return MatchOperand_NoMatch;
4708   auto Format = Dfmt | Nfmt << 4;
4709   Operands.push_back(
4710       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4711   return MatchOperand_Success;
4712 }
4713 
4714 //===----------------------------------------------------------------------===//
4715 // ds
4716 //===----------------------------------------------------------------------===//
4717 
4718 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4719                                     const OperandVector &Operands) {
4720   OptionalImmIndexMap OptionalIdx;
4721 
4722   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4723     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4724 
4725     // Add the register arguments
4726     if (Op.isReg()) {
4727       Op.addRegOperands(Inst, 1);
4728       continue;
4729     }
4730 
4731     // Handle optional arguments
4732     OptionalIdx[Op.getImmTy()] = i;
4733   }
4734 
4735   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4736   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4737   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4738 
4739   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4740 }
4741 
4742 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4743                                 bool IsGdsHardcoded) {
4744   OptionalImmIndexMap OptionalIdx;
4745 
4746   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4747     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4748 
4749     // Add the register arguments
4750     if (Op.isReg()) {
4751       Op.addRegOperands(Inst, 1);
4752       continue;
4753     }
4754 
4755     if (Op.isToken() && Op.getToken() == "gds") {
4756       IsGdsHardcoded = true;
4757       continue;
4758     }
4759 
4760     // Handle optional arguments
4761     OptionalIdx[Op.getImmTy()] = i;
4762   }
4763 
4764   AMDGPUOperand::ImmTy OffsetType =
4765     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4766      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4767      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4768                                                       AMDGPUOperand::ImmTyOffset;
4769 
4770   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4771 
4772   if (!IsGdsHardcoded) {
4773     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4774   }
4775   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4776 }
4777 
4778 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4779   OptionalImmIndexMap OptionalIdx;
4780 
4781   unsigned OperandIdx[4];
4782   unsigned EnMask = 0;
4783   int SrcIdx = 0;
4784 
4785   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4786     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4787 
4788     // Add the register arguments
4789     if (Op.isReg()) {
4790       assert(SrcIdx < 4);
4791       OperandIdx[SrcIdx] = Inst.size();
4792       Op.addRegOperands(Inst, 1);
4793       ++SrcIdx;
4794       continue;
4795     }
4796 
4797     if (Op.isOff()) {
4798       assert(SrcIdx < 4);
4799       OperandIdx[SrcIdx] = Inst.size();
4800       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4801       ++SrcIdx;
4802       continue;
4803     }
4804 
4805     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4806       Op.addImmOperands(Inst, 1);
4807       continue;
4808     }
4809 
4810     if (Op.isToken() && Op.getToken() == "done")
4811       continue;
4812 
4813     // Handle optional arguments
4814     OptionalIdx[Op.getImmTy()] = i;
4815   }
4816 
4817   assert(SrcIdx == 4);
4818 
4819   bool Compr = false;
4820   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4821     Compr = true;
4822     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4823     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4824     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4825   }
4826 
4827   for (auto i = 0; i < SrcIdx; ++i) {
4828     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4829       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4830     }
4831   }
4832 
4833   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4834   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4835 
4836   Inst.addOperand(MCOperand::createImm(EnMask));
4837 }
4838 
4839 //===----------------------------------------------------------------------===//
4840 // s_waitcnt
4841 //===----------------------------------------------------------------------===//
4842 
4843 static bool
4844 encodeCnt(
4845   const AMDGPU::IsaVersion ISA,
4846   int64_t &IntVal,
4847   int64_t CntVal,
4848   bool Saturate,
4849   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4850   unsigned (*decode)(const IsaVersion &Version, unsigned))
4851 {
4852   bool Failed = false;
4853 
4854   IntVal = encode(ISA, IntVal, CntVal);
4855   if (CntVal != decode(ISA, IntVal)) {
4856     if (Saturate) {
4857       IntVal = encode(ISA, IntVal, -1);
4858     } else {
4859       Failed = true;
4860     }
4861   }
4862   return Failed;
4863 }
4864 
4865 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4866 
4867   SMLoc CntLoc = getLoc();
4868   StringRef CntName = getTokenStr();
4869 
4870   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4871       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4872     return false;
4873 
4874   int64_t CntVal;
4875   SMLoc ValLoc = getLoc();
4876   if (!parseExpr(CntVal))
4877     return false;
4878 
4879   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4880 
4881   bool Failed = true;
4882   bool Sat = CntName.endswith("_sat");
4883 
4884   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4885     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4886   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4887     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4888   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4889     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4890   } else {
4891     Error(CntLoc, "invalid counter name " + CntName);
4892     return false;
4893   }
4894 
4895   if (Failed) {
4896     Error(ValLoc, "too large value for " + CntName);
4897     return false;
4898   }
4899 
4900   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4901     return false;
4902 
4903   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4904     if (isToken(AsmToken::EndOfStatement)) {
4905       Error(getLoc(), "expected a counter name");
4906       return false;
4907     }
4908   }
4909 
4910   return true;
4911 }
4912 
4913 OperandMatchResultTy
4914 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4915   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4916   int64_t Waitcnt = getWaitcntBitMask(ISA);
4917   SMLoc S = getLoc();
4918 
4919   // If parse failed, do not return error code
4920   // to avoid excessive error messages.
4921   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4922     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4923   } else {
4924     parseExpr(Waitcnt);
4925   }
4926 
4927   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4928   return MatchOperand_Success;
4929 }
4930 
4931 bool
4932 AMDGPUOperand::isSWaitCnt() const {
4933   return isImm();
4934 }
4935 
4936 //===----------------------------------------------------------------------===//
4937 // hwreg
4938 //===----------------------------------------------------------------------===//
4939 
4940 bool
4941 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4942                                 int64_t &Offset,
4943                                 int64_t &Width) {
4944   using namespace llvm::AMDGPU::Hwreg;
4945 
4946   // The register may be specified by name or using a numeric code
4947   if (isToken(AsmToken::Identifier) &&
4948       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4949     HwReg.IsSymbolic = true;
4950     lex(); // skip message name
4951   } else if (!parseExpr(HwReg.Id)) {
4952     return false;
4953   }
4954 
4955   if (trySkipToken(AsmToken::RParen))
4956     return true;
4957 
4958   // parse optional params
4959   return
4960     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4961     parseExpr(Offset) &&
4962     skipToken(AsmToken::Comma, "expected a comma") &&
4963     parseExpr(Width) &&
4964     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4965 }
4966 
4967 bool
4968 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4969                                const int64_t Offset,
4970                                const int64_t Width,
4971                                const SMLoc Loc) {
4972 
4973   using namespace llvm::AMDGPU::Hwreg;
4974 
4975   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4976     Error(Loc, "specified hardware register is not supported on this GPU");
4977     return false;
4978   } else if (!isValidHwreg(HwReg.Id)) {
4979     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4980     return false;
4981   } else if (!isValidHwregOffset(Offset)) {
4982     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4983     return false;
4984   } else if (!isValidHwregWidth(Width)) {
4985     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4986     return false;
4987   }
4988   return true;
4989 }
4990 
4991 OperandMatchResultTy
4992 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4993   using namespace llvm::AMDGPU::Hwreg;
4994 
4995   int64_t ImmVal = 0;
4996   SMLoc Loc = getLoc();
4997 
4998   // If parse failed, do not return error code
4999   // to avoid excessive error messages.
5000   if (trySkipId("hwreg", AsmToken::LParen)) {
5001     OperandInfoTy HwReg(ID_UNKNOWN_);
5002     int64_t Offset = OFFSET_DEFAULT_;
5003     int64_t Width = WIDTH_DEFAULT_;
5004     if (parseHwregBody(HwReg, Offset, Width) &&
5005         validateHwreg(HwReg, Offset, Width, Loc)) {
5006       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5007     }
5008   } else if (parseExpr(ImmVal)) {
5009     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5010       Error(Loc, "invalid immediate: only 16-bit values are legal");
5011   }
5012 
5013   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5014   return MatchOperand_Success;
5015 }
5016 
5017 bool AMDGPUOperand::isHwreg() const {
5018   return isImmTy(ImmTyHwreg);
5019 }
5020 
5021 //===----------------------------------------------------------------------===//
5022 // sendmsg
5023 //===----------------------------------------------------------------------===//
5024 
5025 bool
5026 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5027                                   OperandInfoTy &Op,
5028                                   OperandInfoTy &Stream) {
5029   using namespace llvm::AMDGPU::SendMsg;
5030 
5031   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5032     Msg.IsSymbolic = true;
5033     lex(); // skip message name
5034   } else if (!parseExpr(Msg.Id)) {
5035     return false;
5036   }
5037 
5038   if (trySkipToken(AsmToken::Comma)) {
5039     Op.IsDefined = true;
5040     if (isToken(AsmToken::Identifier) &&
5041         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5042       lex(); // skip operation name
5043     } else if (!parseExpr(Op.Id)) {
5044       return false;
5045     }
5046 
5047     if (trySkipToken(AsmToken::Comma)) {
5048       Stream.IsDefined = true;
5049       if (!parseExpr(Stream.Id))
5050         return false;
5051     }
5052   }
5053 
5054   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5055 }
5056 
5057 bool
5058 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5059                                  const OperandInfoTy &Op,
5060                                  const OperandInfoTy &Stream,
5061                                  const SMLoc S) {
5062   using namespace llvm::AMDGPU::SendMsg;
5063 
5064   // Validation strictness depends on whether message is specified
5065   // in a symbolc or in a numeric form. In the latter case
5066   // only encoding possibility is checked.
5067   bool Strict = Msg.IsSymbolic;
5068 
5069   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5070     Error(S, "invalid message id");
5071     return false;
5072   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5073     Error(S, Op.IsDefined ?
5074              "message does not support operations" :
5075              "missing message operation");
5076     return false;
5077   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5078     Error(S, "invalid operation id");
5079     return false;
5080   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5081     Error(S, "message operation does not support streams");
5082     return false;
5083   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5084     Error(S, "invalid message stream id");
5085     return false;
5086   }
5087   return true;
5088 }
5089 
5090 OperandMatchResultTy
5091 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5092   using namespace llvm::AMDGPU::SendMsg;
5093 
5094   int64_t ImmVal = 0;
5095   SMLoc Loc = getLoc();
5096 
5097   // If parse failed, do not return error code
5098   // to avoid excessive error messages.
5099   if (trySkipId("sendmsg", AsmToken::LParen)) {
5100     OperandInfoTy Msg(ID_UNKNOWN_);
5101     OperandInfoTy Op(OP_NONE_);
5102     OperandInfoTy Stream(STREAM_ID_NONE_);
5103     if (parseSendMsgBody(Msg, Op, Stream) &&
5104         validateSendMsg(Msg, Op, Stream, Loc)) {
5105       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5106     }
5107   } else if (parseExpr(ImmVal)) {
5108     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5109       Error(Loc, "invalid immediate: only 16-bit values are legal");
5110   }
5111 
5112   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5113   return MatchOperand_Success;
5114 }
5115 
5116 bool AMDGPUOperand::isSendMsg() const {
5117   return isImmTy(ImmTySendMsg);
5118 }
5119 
5120 //===----------------------------------------------------------------------===//
5121 // v_interp
5122 //===----------------------------------------------------------------------===//
5123 
5124 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5125   if (getLexer().getKind() != AsmToken::Identifier)
5126     return MatchOperand_NoMatch;
5127 
5128   StringRef Str = Parser.getTok().getString();
5129   int Slot = StringSwitch<int>(Str)
5130     .Case("p10", 0)
5131     .Case("p20", 1)
5132     .Case("p0", 2)
5133     .Default(-1);
5134 
5135   SMLoc S = Parser.getTok().getLoc();
5136   if (Slot == -1)
5137     return MatchOperand_ParseFail;
5138 
5139   Parser.Lex();
5140   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5141                                               AMDGPUOperand::ImmTyInterpSlot));
5142   return MatchOperand_Success;
5143 }
5144 
5145 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5146   if (getLexer().getKind() != AsmToken::Identifier)
5147     return MatchOperand_NoMatch;
5148 
5149   StringRef Str = Parser.getTok().getString();
5150   if (!Str.startswith("attr"))
5151     return MatchOperand_NoMatch;
5152 
5153   StringRef Chan = Str.take_back(2);
5154   int AttrChan = StringSwitch<int>(Chan)
5155     .Case(".x", 0)
5156     .Case(".y", 1)
5157     .Case(".z", 2)
5158     .Case(".w", 3)
5159     .Default(-1);
5160   if (AttrChan == -1)
5161     return MatchOperand_ParseFail;
5162 
5163   Str = Str.drop_back(2).drop_front(4);
5164 
5165   uint8_t Attr;
5166   if (Str.getAsInteger(10, Attr))
5167     return MatchOperand_ParseFail;
5168 
5169   SMLoc S = Parser.getTok().getLoc();
5170   Parser.Lex();
5171   if (Attr > 63) {
5172     Error(S, "out of bounds attr");
5173     return MatchOperand_Success;
5174   }
5175 
5176   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5177 
5178   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5179                                               AMDGPUOperand::ImmTyInterpAttr));
5180   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5181                                               AMDGPUOperand::ImmTyAttrChan));
5182   return MatchOperand_Success;
5183 }
5184 
5185 //===----------------------------------------------------------------------===//
5186 // exp
5187 //===----------------------------------------------------------------------===//
5188 
5189 void AMDGPUAsmParser::errorExpTgt() {
5190   Error(Parser.getTok().getLoc(), "invalid exp target");
5191 }
5192 
5193 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5194                                                       uint8_t &Val) {
5195   if (Str == "null") {
5196     Val = 9;
5197     return MatchOperand_Success;
5198   }
5199 
5200   if (Str.startswith("mrt")) {
5201     Str = Str.drop_front(3);
5202     if (Str == "z") { // == mrtz
5203       Val = 8;
5204       return MatchOperand_Success;
5205     }
5206 
5207     if (Str.getAsInteger(10, Val))
5208       return MatchOperand_ParseFail;
5209 
5210     if (Val > 7)
5211       errorExpTgt();
5212 
5213     return MatchOperand_Success;
5214   }
5215 
5216   if (Str.startswith("pos")) {
5217     Str = Str.drop_front(3);
5218     if (Str.getAsInteger(10, Val))
5219       return MatchOperand_ParseFail;
5220 
5221     if (Val > 4 || (Val == 4 && !isGFX10()))
5222       errorExpTgt();
5223 
5224     Val += 12;
5225     return MatchOperand_Success;
5226   }
5227 
5228   if (isGFX10() && Str == "prim") {
5229     Val = 20;
5230     return MatchOperand_Success;
5231   }
5232 
5233   if (Str.startswith("param")) {
5234     Str = Str.drop_front(5);
5235     if (Str.getAsInteger(10, Val))
5236       return MatchOperand_ParseFail;
5237 
5238     if (Val >= 32)
5239       errorExpTgt();
5240 
5241     Val += 32;
5242     return MatchOperand_Success;
5243   }
5244 
5245   if (Str.startswith("invalid_target_")) {
5246     Str = Str.drop_front(15);
5247     if (Str.getAsInteger(10, Val))
5248       return MatchOperand_ParseFail;
5249 
5250     errorExpTgt();
5251     return MatchOperand_Success;
5252   }
5253 
5254   return MatchOperand_NoMatch;
5255 }
5256 
5257 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5258   uint8_t Val;
5259   StringRef Str = Parser.getTok().getString();
5260 
5261   auto Res = parseExpTgtImpl(Str, Val);
5262   if (Res != MatchOperand_Success)
5263     return Res;
5264 
5265   SMLoc S = Parser.getTok().getLoc();
5266   Parser.Lex();
5267 
5268   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5269                                               AMDGPUOperand::ImmTyExpTgt));
5270   return MatchOperand_Success;
5271 }
5272 
5273 //===----------------------------------------------------------------------===//
5274 // parser helpers
5275 //===----------------------------------------------------------------------===//
5276 
5277 bool
5278 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5279   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5280 }
5281 
5282 bool
5283 AMDGPUAsmParser::isId(const StringRef Id) const {
5284   return isId(getToken(), Id);
5285 }
5286 
5287 bool
5288 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5289   return getTokenKind() == Kind;
5290 }
5291 
5292 bool
5293 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5294   if (isId(Id)) {
5295     lex();
5296     return true;
5297   }
5298   return false;
5299 }
5300 
5301 bool
5302 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5303   if (isId(Id) && peekToken().is(Kind)) {
5304     lex();
5305     lex();
5306     return true;
5307   }
5308   return false;
5309 }
5310 
5311 bool
5312 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5313   if (isToken(Kind)) {
5314     lex();
5315     return true;
5316   }
5317   return false;
5318 }
5319 
5320 bool
5321 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5322                            const StringRef ErrMsg) {
5323   if (!trySkipToken(Kind)) {
5324     Error(getLoc(), ErrMsg);
5325     return false;
5326   }
5327   return true;
5328 }
5329 
5330 bool
5331 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5332   return !getParser().parseAbsoluteExpression(Imm);
5333 }
5334 
5335 bool
5336 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5337   SMLoc S = getLoc();
5338 
5339   const MCExpr *Expr;
5340   if (Parser.parseExpression(Expr))
5341     return false;
5342 
5343   int64_t IntVal;
5344   if (Expr->evaluateAsAbsolute(IntVal)) {
5345     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5346   } else {
5347     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5348   }
5349   return true;
5350 }
5351 
5352 bool
5353 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5354   if (isToken(AsmToken::String)) {
5355     Val = getToken().getStringContents();
5356     lex();
5357     return true;
5358   } else {
5359     Error(getLoc(), ErrMsg);
5360     return false;
5361   }
5362 }
5363 
5364 AsmToken
5365 AMDGPUAsmParser::getToken() const {
5366   return Parser.getTok();
5367 }
5368 
5369 AsmToken
5370 AMDGPUAsmParser::peekToken() {
5371   return getLexer().peekTok();
5372 }
5373 
5374 void
5375 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5376   auto TokCount = getLexer().peekTokens(Tokens);
5377 
5378   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5379     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5380 }
5381 
5382 AsmToken::TokenKind
5383 AMDGPUAsmParser::getTokenKind() const {
5384   return getLexer().getKind();
5385 }
5386 
5387 SMLoc
5388 AMDGPUAsmParser::getLoc() const {
5389   return getToken().getLoc();
5390 }
5391 
5392 StringRef
5393 AMDGPUAsmParser::getTokenStr() const {
5394   return getToken().getString();
5395 }
5396 
5397 void
5398 AMDGPUAsmParser::lex() {
5399   Parser.Lex();
5400 }
5401 
5402 //===----------------------------------------------------------------------===//
5403 // swizzle
5404 //===----------------------------------------------------------------------===//
5405 
5406 LLVM_READNONE
5407 static unsigned
5408 encodeBitmaskPerm(const unsigned AndMask,
5409                   const unsigned OrMask,
5410                   const unsigned XorMask) {
5411   using namespace llvm::AMDGPU::Swizzle;
5412 
5413   return BITMASK_PERM_ENC |
5414          (AndMask << BITMASK_AND_SHIFT) |
5415          (OrMask  << BITMASK_OR_SHIFT)  |
5416          (XorMask << BITMASK_XOR_SHIFT);
5417 }
5418 
5419 bool
5420 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5421                                       const unsigned MinVal,
5422                                       const unsigned MaxVal,
5423                                       const StringRef ErrMsg) {
5424   for (unsigned i = 0; i < OpNum; ++i) {
5425     if (!skipToken(AsmToken::Comma, "expected a comma")){
5426       return false;
5427     }
5428     SMLoc ExprLoc = Parser.getTok().getLoc();
5429     if (!parseExpr(Op[i])) {
5430       return false;
5431     }
5432     if (Op[i] < MinVal || Op[i] > MaxVal) {
5433       Error(ExprLoc, ErrMsg);
5434       return false;
5435     }
5436   }
5437 
5438   return true;
5439 }
5440 
5441 bool
5442 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5443   using namespace llvm::AMDGPU::Swizzle;
5444 
5445   int64_t Lane[LANE_NUM];
5446   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5447                            "expected a 2-bit lane id")) {
5448     Imm = QUAD_PERM_ENC;
5449     for (unsigned I = 0; I < LANE_NUM; ++I) {
5450       Imm |= Lane[I] << (LANE_SHIFT * I);
5451     }
5452     return true;
5453   }
5454   return false;
5455 }
5456 
5457 bool
5458 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5459   using namespace llvm::AMDGPU::Swizzle;
5460 
5461   SMLoc S = Parser.getTok().getLoc();
5462   int64_t GroupSize;
5463   int64_t LaneIdx;
5464 
5465   if (!parseSwizzleOperands(1, &GroupSize,
5466                             2, 32,
5467                             "group size must be in the interval [2,32]")) {
5468     return false;
5469   }
5470   if (!isPowerOf2_64(GroupSize)) {
5471     Error(S, "group size must be a power of two");
5472     return false;
5473   }
5474   if (parseSwizzleOperands(1, &LaneIdx,
5475                            0, GroupSize - 1,
5476                            "lane id must be in the interval [0,group size - 1]")) {
5477     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5478     return true;
5479   }
5480   return false;
5481 }
5482 
5483 bool
5484 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5485   using namespace llvm::AMDGPU::Swizzle;
5486 
5487   SMLoc S = Parser.getTok().getLoc();
5488   int64_t GroupSize;
5489 
5490   if (!parseSwizzleOperands(1, &GroupSize,
5491       2, 32, "group size must be in the interval [2,32]")) {
5492     return false;
5493   }
5494   if (!isPowerOf2_64(GroupSize)) {
5495     Error(S, "group size must be a power of two");
5496     return false;
5497   }
5498 
5499   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5500   return true;
5501 }
5502 
5503 bool
5504 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5505   using namespace llvm::AMDGPU::Swizzle;
5506 
5507   SMLoc S = Parser.getTok().getLoc();
5508   int64_t GroupSize;
5509 
5510   if (!parseSwizzleOperands(1, &GroupSize,
5511       1, 16, "group size must be in the interval [1,16]")) {
5512     return false;
5513   }
5514   if (!isPowerOf2_64(GroupSize)) {
5515     Error(S, "group size must be a power of two");
5516     return false;
5517   }
5518 
5519   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5520   return true;
5521 }
5522 
5523 bool
5524 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5525   using namespace llvm::AMDGPU::Swizzle;
5526 
5527   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5528     return false;
5529   }
5530 
5531   StringRef Ctl;
5532   SMLoc StrLoc = Parser.getTok().getLoc();
5533   if (!parseString(Ctl)) {
5534     return false;
5535   }
5536   if (Ctl.size() != BITMASK_WIDTH) {
5537     Error(StrLoc, "expected a 5-character mask");
5538     return false;
5539   }
5540 
5541   unsigned AndMask = 0;
5542   unsigned OrMask = 0;
5543   unsigned XorMask = 0;
5544 
5545   for (size_t i = 0; i < Ctl.size(); ++i) {
5546     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5547     switch(Ctl[i]) {
5548     default:
5549       Error(StrLoc, "invalid mask");
5550       return false;
5551     case '0':
5552       break;
5553     case '1':
5554       OrMask |= Mask;
5555       break;
5556     case 'p':
5557       AndMask |= Mask;
5558       break;
5559     case 'i':
5560       AndMask |= Mask;
5561       XorMask |= Mask;
5562       break;
5563     }
5564   }
5565 
5566   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5567   return true;
5568 }
5569 
5570 bool
5571 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5572 
5573   SMLoc OffsetLoc = Parser.getTok().getLoc();
5574 
5575   if (!parseExpr(Imm)) {
5576     return false;
5577   }
5578   if (!isUInt<16>(Imm)) {
5579     Error(OffsetLoc, "expected a 16-bit offset");
5580     return false;
5581   }
5582   return true;
5583 }
5584 
5585 bool
5586 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5587   using namespace llvm::AMDGPU::Swizzle;
5588 
5589   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5590 
5591     SMLoc ModeLoc = Parser.getTok().getLoc();
5592     bool Ok = false;
5593 
5594     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5595       Ok = parseSwizzleQuadPerm(Imm);
5596     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5597       Ok = parseSwizzleBitmaskPerm(Imm);
5598     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5599       Ok = parseSwizzleBroadcast(Imm);
5600     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5601       Ok = parseSwizzleSwap(Imm);
5602     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5603       Ok = parseSwizzleReverse(Imm);
5604     } else {
5605       Error(ModeLoc, "expected a swizzle mode");
5606     }
5607 
5608     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5609   }
5610 
5611   return false;
5612 }
5613 
5614 OperandMatchResultTy
5615 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5616   SMLoc S = Parser.getTok().getLoc();
5617   int64_t Imm = 0;
5618 
5619   if (trySkipId("offset")) {
5620 
5621     bool Ok = false;
5622     if (skipToken(AsmToken::Colon, "expected a colon")) {
5623       if (trySkipId("swizzle")) {
5624         Ok = parseSwizzleMacro(Imm);
5625       } else {
5626         Ok = parseSwizzleOffset(Imm);
5627       }
5628     }
5629 
5630     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5631 
5632     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5633   } else {
5634     // Swizzle "offset" operand is optional.
5635     // If it is omitted, try parsing other optional operands.
5636     return parseOptionalOpr(Operands);
5637   }
5638 }
5639 
5640 bool
5641 AMDGPUOperand::isSwizzle() const {
5642   return isImmTy(ImmTySwizzle);
5643 }
5644 
5645 //===----------------------------------------------------------------------===//
5646 // VGPR Index Mode
5647 //===----------------------------------------------------------------------===//
5648 
5649 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5650 
5651   using namespace llvm::AMDGPU::VGPRIndexMode;
5652 
5653   if (trySkipToken(AsmToken::RParen)) {
5654     return OFF;
5655   }
5656 
5657   int64_t Imm = 0;
5658 
5659   while (true) {
5660     unsigned Mode = 0;
5661     SMLoc S = Parser.getTok().getLoc();
5662 
5663     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5664       if (trySkipId(IdSymbolic[ModeId])) {
5665         Mode = 1 << ModeId;
5666         break;
5667       }
5668     }
5669 
5670     if (Mode == 0) {
5671       Error(S, (Imm == 0)?
5672                "expected a VGPR index mode or a closing parenthesis" :
5673                "expected a VGPR index mode");
5674       break;
5675     }
5676 
5677     if (Imm & Mode) {
5678       Error(S, "duplicate VGPR index mode");
5679       break;
5680     }
5681     Imm |= Mode;
5682 
5683     if (trySkipToken(AsmToken::RParen))
5684       break;
5685     if (!skipToken(AsmToken::Comma,
5686                    "expected a comma or a closing parenthesis"))
5687       break;
5688   }
5689 
5690   return Imm;
5691 }
5692 
5693 OperandMatchResultTy
5694 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5695 
5696   int64_t Imm = 0;
5697   SMLoc S = Parser.getTok().getLoc();
5698 
5699   if (getLexer().getKind() == AsmToken::Identifier &&
5700       Parser.getTok().getString() == "gpr_idx" &&
5701       getLexer().peekTok().is(AsmToken::LParen)) {
5702 
5703     Parser.Lex();
5704     Parser.Lex();
5705 
5706     // If parse failed, trigger an error but do not return error code
5707     // to avoid excessive error messages.
5708     Imm = parseGPRIdxMacro();
5709 
5710   } else {
5711     if (getParser().parseAbsoluteExpression(Imm))
5712       return MatchOperand_NoMatch;
5713     if (Imm < 0 || !isUInt<4>(Imm)) {
5714       Error(S, "invalid immediate: only 4-bit values are legal");
5715     }
5716   }
5717 
5718   Operands.push_back(
5719       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5720   return MatchOperand_Success;
5721 }
5722 
5723 bool AMDGPUOperand::isGPRIdxMode() const {
5724   return isImmTy(ImmTyGprIdxMode);
5725 }
5726 
5727 //===----------------------------------------------------------------------===//
5728 // sopp branch targets
5729 //===----------------------------------------------------------------------===//
5730 
5731 OperandMatchResultTy
5732 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5733 
5734   // Make sure we are not parsing something
5735   // that looks like a label or an expression but is not.
5736   // This will improve error messages.
5737   if (isRegister() || isModifier())
5738     return MatchOperand_NoMatch;
5739 
5740   if (parseExpr(Operands)) {
5741 
5742     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5743     assert(Opr.isImm() || Opr.isExpr());
5744     SMLoc Loc = Opr.getStartLoc();
5745 
5746     // Currently we do not support arbitrary expressions as branch targets.
5747     // Only labels and absolute expressions are accepted.
5748     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5749       Error(Loc, "expected an absolute expression or a label");
5750     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5751       Error(Loc, "expected a 16-bit signed jump offset");
5752     }
5753   }
5754 
5755   return MatchOperand_Success; // avoid excessive error messages
5756 }
5757 
5758 //===----------------------------------------------------------------------===//
5759 // Boolean holding registers
5760 //===----------------------------------------------------------------------===//
5761 
5762 OperandMatchResultTy
5763 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5764   return parseReg(Operands);
5765 }
5766 
5767 //===----------------------------------------------------------------------===//
5768 // mubuf
5769 //===----------------------------------------------------------------------===//
5770 
5771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5772   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5773 }
5774 
5775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5776   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5777 }
5778 
5779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5780   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5781 }
5782 
5783 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5784                                const OperandVector &Operands,
5785                                bool IsAtomic,
5786                                bool IsAtomicReturn,
5787                                bool IsLds) {
5788   bool IsLdsOpcode = IsLds;
5789   bool HasLdsModifier = false;
5790   OptionalImmIndexMap OptionalIdx;
5791   assert(IsAtomicReturn ? IsAtomic : true);
5792   unsigned FirstOperandIdx = 1;
5793 
5794   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5795     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5796 
5797     // Add the register arguments
5798     if (Op.isReg()) {
5799       Op.addRegOperands(Inst, 1);
5800       // Insert a tied src for atomic return dst.
5801       // This cannot be postponed as subsequent calls to
5802       // addImmOperands rely on correct number of MC operands.
5803       if (IsAtomicReturn && i == FirstOperandIdx)
5804         Op.addRegOperands(Inst, 1);
5805       continue;
5806     }
5807 
5808     // Handle the case where soffset is an immediate
5809     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5810       Op.addImmOperands(Inst, 1);
5811       continue;
5812     }
5813 
5814     HasLdsModifier |= Op.isLDS();
5815 
5816     // Handle tokens like 'offen' which are sometimes hard-coded into the
5817     // asm string.  There are no MCInst operands for these.
5818     if (Op.isToken()) {
5819       continue;
5820     }
5821     assert(Op.isImm());
5822 
5823     // Handle optional arguments
5824     OptionalIdx[Op.getImmTy()] = i;
5825   }
5826 
5827   // This is a workaround for an llvm quirk which may result in an
5828   // incorrect instruction selection. Lds and non-lds versions of
5829   // MUBUF instructions are identical except that lds versions
5830   // have mandatory 'lds' modifier. However this modifier follows
5831   // optional modifiers and llvm asm matcher regards this 'lds'
5832   // modifier as an optional one. As a result, an lds version
5833   // of opcode may be selected even if it has no 'lds' modifier.
5834   if (IsLdsOpcode && !HasLdsModifier) {
5835     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5836     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5837       Inst.setOpcode(NoLdsOpcode);
5838       IsLdsOpcode = false;
5839     }
5840   }
5841 
5842   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5843   if (!IsAtomic) { // glc is hard-coded.
5844     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5845   }
5846   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5847 
5848   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5849     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5850   }
5851 
5852   if (isGFX10())
5853     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5854 }
5855 
5856 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5857   OptionalImmIndexMap OptionalIdx;
5858 
5859   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5860     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5861 
5862     // Add the register arguments
5863     if (Op.isReg()) {
5864       Op.addRegOperands(Inst, 1);
5865       continue;
5866     }
5867 
5868     // Handle the case where soffset is an immediate
5869     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5870       Op.addImmOperands(Inst, 1);
5871       continue;
5872     }
5873 
5874     // Handle tokens like 'offen' which are sometimes hard-coded into the
5875     // asm string.  There are no MCInst operands for these.
5876     if (Op.isToken()) {
5877       continue;
5878     }
5879     assert(Op.isImm());
5880 
5881     // Handle optional arguments
5882     OptionalIdx[Op.getImmTy()] = i;
5883   }
5884 
5885   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5886                         AMDGPUOperand::ImmTyOffset);
5887   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5888   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5889   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5890   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5891 
5892   if (isGFX10())
5893     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5894 }
5895 
5896 //===----------------------------------------------------------------------===//
5897 // mimg
5898 //===----------------------------------------------------------------------===//
5899 
5900 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5901                               bool IsAtomic) {
5902   unsigned I = 1;
5903   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5904   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5905     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5906   }
5907 
5908   if (IsAtomic) {
5909     // Add src, same as dst
5910     assert(Desc.getNumDefs() == 1);
5911     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5912   }
5913 
5914   OptionalImmIndexMap OptionalIdx;
5915 
5916   for (unsigned E = Operands.size(); I != E; ++I) {
5917     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5918 
5919     // Add the register arguments
5920     if (Op.isReg()) {
5921       Op.addRegOperands(Inst, 1);
5922     } else if (Op.isImmModifier()) {
5923       OptionalIdx[Op.getImmTy()] = I;
5924     } else if (!Op.isToken()) {
5925       llvm_unreachable("unexpected operand type");
5926     }
5927   }
5928 
5929   bool IsGFX10 = isGFX10();
5930 
5931   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5932   if (IsGFX10)
5933     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5934   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5935   if (IsGFX10)
5936     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5939   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5940   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5942   if (!IsGFX10)
5943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5944   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5945 }
5946 
5947 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5948   cvtMIMG(Inst, Operands, true);
5949 }
5950 
5951 //===----------------------------------------------------------------------===//
5952 // smrd
5953 //===----------------------------------------------------------------------===//
5954 
5955 bool AMDGPUOperand::isSMRDOffset8() const {
5956   return isImm() && isUInt<8>(getImm());
5957 }
5958 
5959 bool AMDGPUOperand::isSMRDOffset20() const {
5960   return isImm() && isUInt<20>(getImm());
5961 }
5962 
5963 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5964   // 32-bit literals are only supported on CI and we only want to use them
5965   // when the offset is > 8-bits.
5966   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5967 }
5968 
5969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5970   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5971 }
5972 
5973 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5974   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5975 }
5976 
5977 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5978   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5979 }
5980 
5981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5982   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5983 }
5984 
5985 //===----------------------------------------------------------------------===//
5986 // vop3
5987 //===----------------------------------------------------------------------===//
5988 
5989 static bool ConvertOmodMul(int64_t &Mul) {
5990   if (Mul != 1 && Mul != 2 && Mul != 4)
5991     return false;
5992 
5993   Mul >>= 1;
5994   return true;
5995 }
5996 
5997 static bool ConvertOmodDiv(int64_t &Div) {
5998   if (Div == 1) {
5999     Div = 0;
6000     return true;
6001   }
6002 
6003   if (Div == 2) {
6004     Div = 3;
6005     return true;
6006   }
6007 
6008   return false;
6009 }
6010 
6011 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6012   if (BoundCtrl == 0) {
6013     BoundCtrl = 1;
6014     return true;
6015   }
6016 
6017   if (BoundCtrl == -1) {
6018     BoundCtrl = 0;
6019     return true;
6020   }
6021 
6022   return false;
6023 }
6024 
6025 // Note: the order in this table matches the order of operands in AsmString.
6026 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6027   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6028   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6029   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6030   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6031   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6032   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6033   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6034   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6035   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6036   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6037   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6038   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6039   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6040   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6041   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6042   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6043   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6044   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6045   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6046   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6047   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6048   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6049   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6050   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6051   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6052   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6053   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6054   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6055   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6056   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6057   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6058   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6059   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6060   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6061   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6062   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6063   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6064   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6065   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6066   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6067   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6068   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6069   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6070 };
6071 
6072 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6073   unsigned size = Operands.size();
6074   assert(size > 0);
6075 
6076   OperandMatchResultTy res = parseOptionalOpr(Operands);
6077 
6078   // This is a hack to enable hardcoded mandatory operands which follow
6079   // optional operands.
6080   //
6081   // Current design assumes that all operands after the first optional operand
6082   // are also optional. However implementation of some instructions violates
6083   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6084   //
6085   // To alleviate this problem, we have to (implicitly) parse extra operands
6086   // to make sure autogenerated parser of custom operands never hit hardcoded
6087   // mandatory operands.
6088 
6089   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6090 
6091     // We have parsed the first optional operand.
6092     // Parse as many operands as necessary to skip all mandatory operands.
6093 
6094     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6095       if (res != MatchOperand_Success ||
6096           getLexer().is(AsmToken::EndOfStatement)) break;
6097       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6098       res = parseOptionalOpr(Operands);
6099     }
6100   }
6101 
6102   return res;
6103 }
6104 
6105 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6106   OperandMatchResultTy res;
6107   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6108     // try to parse any optional operand here
6109     if (Op.IsBit) {
6110       res = parseNamedBit(Op.Name, Operands, Op.Type);
6111     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6112       res = parseOModOperand(Operands);
6113     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6114                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6115                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6116       res = parseSDWASel(Operands, Op.Name, Op.Type);
6117     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6118       res = parseSDWADstUnused(Operands);
6119     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6120                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6121                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6122                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6123       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6124                                         Op.ConvertResult);
6125     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6126       res = parseDim(Operands);
6127     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6128       res = parseDfmtNfmt(Operands);
6129     } else {
6130       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6131     }
6132     if (res != MatchOperand_NoMatch) {
6133       return res;
6134     }
6135   }
6136   return MatchOperand_NoMatch;
6137 }
6138 
6139 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6140   StringRef Name = Parser.getTok().getString();
6141   if (Name == "mul") {
6142     return parseIntWithPrefix("mul", Operands,
6143                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6144   }
6145 
6146   if (Name == "div") {
6147     return parseIntWithPrefix("div", Operands,
6148                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6149   }
6150 
6151   return MatchOperand_NoMatch;
6152 }
6153 
6154 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6155   cvtVOP3P(Inst, Operands);
6156 
6157   int Opc = Inst.getOpcode();
6158 
6159   int SrcNum;
6160   const int Ops[] = { AMDGPU::OpName::src0,
6161                       AMDGPU::OpName::src1,
6162                       AMDGPU::OpName::src2 };
6163   for (SrcNum = 0;
6164        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6165        ++SrcNum);
6166   assert(SrcNum > 0);
6167 
6168   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6169   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6170 
6171   if ((OpSel & (1 << SrcNum)) != 0) {
6172     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6173     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6174     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6175   }
6176 }
6177 
6178 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6179       // 1. This operand is input modifiers
6180   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6181       // 2. This is not last operand
6182       && Desc.NumOperands > (OpNum + 1)
6183       // 3. Next operand is register class
6184       && Desc.OpInfo[OpNum + 1].RegClass != -1
6185       // 4. Next register is not tied to any other operand
6186       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6187 }
6188 
6189 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6190 {
6191   OptionalImmIndexMap OptionalIdx;
6192   unsigned Opc = Inst.getOpcode();
6193 
6194   unsigned I = 1;
6195   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6196   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6197     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6198   }
6199 
6200   for (unsigned E = Operands.size(); I != E; ++I) {
6201     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6202     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6203       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6204     } else if (Op.isInterpSlot() ||
6205                Op.isInterpAttr() ||
6206                Op.isAttrChan()) {
6207       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6208     } else if (Op.isImmModifier()) {
6209       OptionalIdx[Op.getImmTy()] = I;
6210     } else {
6211       llvm_unreachable("unhandled operand type");
6212     }
6213   }
6214 
6215   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6216     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6217   }
6218 
6219   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6220     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6221   }
6222 
6223   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6224     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6225   }
6226 }
6227 
6228 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6229                               OptionalImmIndexMap &OptionalIdx) {
6230   unsigned Opc = Inst.getOpcode();
6231 
6232   unsigned I = 1;
6233   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6234   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6235     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6236   }
6237 
6238   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6239     // This instruction has src modifiers
6240     for (unsigned E = Operands.size(); I != E; ++I) {
6241       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6242       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6243         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6244       } else if (Op.isImmModifier()) {
6245         OptionalIdx[Op.getImmTy()] = I;
6246       } else if (Op.isRegOrImm()) {
6247         Op.addRegOrImmOperands(Inst, 1);
6248       } else {
6249         llvm_unreachable("unhandled operand type");
6250       }
6251     }
6252   } else {
6253     // No src modifiers
6254     for (unsigned E = Operands.size(); I != E; ++I) {
6255       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6256       if (Op.isMod()) {
6257         OptionalIdx[Op.getImmTy()] = I;
6258       } else {
6259         Op.addRegOrImmOperands(Inst, 1);
6260       }
6261     }
6262   }
6263 
6264   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6265     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6266   }
6267 
6268   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6269     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6270   }
6271 
6272   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6273   // it has src2 register operand that is tied to dst operand
6274   // we don't allow modifiers for this operand in assembler so src2_modifiers
6275   // should be 0.
6276   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6277       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6278       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6279       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6280       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6281       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6282       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6283     auto it = Inst.begin();
6284     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6285     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6286     ++it;
6287     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6288   }
6289 }
6290 
6291 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6292   OptionalImmIndexMap OptionalIdx;
6293   cvtVOP3(Inst, Operands, OptionalIdx);
6294 }
6295 
6296 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6297                                const OperandVector &Operands) {
6298   OptionalImmIndexMap OptIdx;
6299   const int Opc = Inst.getOpcode();
6300   const MCInstrDesc &Desc = MII.get(Opc);
6301 
6302   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6303 
6304   cvtVOP3(Inst, Operands, OptIdx);
6305 
6306   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6307     assert(!IsPacked);
6308     Inst.addOperand(Inst.getOperand(0));
6309   }
6310 
6311   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6312   // instruction, and then figure out where to actually put the modifiers
6313 
6314   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6315 
6316   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6317   if (OpSelHiIdx != -1) {
6318     int DefaultVal = IsPacked ? -1 : 0;
6319     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6320                           DefaultVal);
6321   }
6322 
6323   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6324   if (NegLoIdx != -1) {
6325     assert(IsPacked);
6326     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6327     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6328   }
6329 
6330   const int Ops[] = { AMDGPU::OpName::src0,
6331                       AMDGPU::OpName::src1,
6332                       AMDGPU::OpName::src2 };
6333   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6334                          AMDGPU::OpName::src1_modifiers,
6335                          AMDGPU::OpName::src2_modifiers };
6336 
6337   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6338 
6339   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6340   unsigned OpSelHi = 0;
6341   unsigned NegLo = 0;
6342   unsigned NegHi = 0;
6343 
6344   if (OpSelHiIdx != -1) {
6345     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6346   }
6347 
6348   if (NegLoIdx != -1) {
6349     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6350     NegLo = Inst.getOperand(NegLoIdx).getImm();
6351     NegHi = Inst.getOperand(NegHiIdx).getImm();
6352   }
6353 
6354   for (int J = 0; J < 3; ++J) {
6355     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6356     if (OpIdx == -1)
6357       break;
6358 
6359     uint32_t ModVal = 0;
6360 
6361     if ((OpSel & (1 << J)) != 0)
6362       ModVal |= SISrcMods::OP_SEL_0;
6363 
6364     if ((OpSelHi & (1 << J)) != 0)
6365       ModVal |= SISrcMods::OP_SEL_1;
6366 
6367     if ((NegLo & (1 << J)) != 0)
6368       ModVal |= SISrcMods::NEG;
6369 
6370     if ((NegHi & (1 << J)) != 0)
6371       ModVal |= SISrcMods::NEG_HI;
6372 
6373     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6374 
6375     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6376   }
6377 }
6378 
6379 //===----------------------------------------------------------------------===//
6380 // dpp
6381 //===----------------------------------------------------------------------===//
6382 
6383 bool AMDGPUOperand::isDPP8() const {
6384   return isImmTy(ImmTyDPP8);
6385 }
6386 
6387 bool AMDGPUOperand::isDPPCtrl() const {
6388   using namespace AMDGPU::DPP;
6389 
6390   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6391   if (result) {
6392     int64_t Imm = getImm();
6393     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6394            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6395            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6396            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6397            (Imm == DppCtrl::WAVE_SHL1) ||
6398            (Imm == DppCtrl::WAVE_ROL1) ||
6399            (Imm == DppCtrl::WAVE_SHR1) ||
6400            (Imm == DppCtrl::WAVE_ROR1) ||
6401            (Imm == DppCtrl::ROW_MIRROR) ||
6402            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6403            (Imm == DppCtrl::BCAST15) ||
6404            (Imm == DppCtrl::BCAST31) ||
6405            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6406            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6407   }
6408   return false;
6409 }
6410 
6411 //===----------------------------------------------------------------------===//
6412 // mAI
6413 //===----------------------------------------------------------------------===//
6414 
6415 bool AMDGPUOperand::isBLGP() const {
6416   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6417 }
6418 
6419 bool AMDGPUOperand::isCBSZ() const {
6420   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6421 }
6422 
6423 bool AMDGPUOperand::isABID() const {
6424   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6425 }
6426 
6427 bool AMDGPUOperand::isS16Imm() const {
6428   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6429 }
6430 
6431 bool AMDGPUOperand::isU16Imm() const {
6432   return isImm() && isUInt<16>(getImm());
6433 }
6434 
6435 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6436   if (!isGFX10())
6437     return MatchOperand_NoMatch;
6438 
6439   SMLoc S = Parser.getTok().getLoc();
6440 
6441   if (getLexer().isNot(AsmToken::Identifier))
6442     return MatchOperand_NoMatch;
6443   if (getLexer().getTok().getString() != "dim")
6444     return MatchOperand_NoMatch;
6445 
6446   Parser.Lex();
6447   if (getLexer().isNot(AsmToken::Colon))
6448     return MatchOperand_ParseFail;
6449 
6450   Parser.Lex();
6451 
6452   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6453   // integer.
6454   std::string Token;
6455   if (getLexer().is(AsmToken::Integer)) {
6456     SMLoc Loc = getLexer().getTok().getEndLoc();
6457     Token = getLexer().getTok().getString();
6458     Parser.Lex();
6459     if (getLexer().getTok().getLoc() != Loc)
6460       return MatchOperand_ParseFail;
6461   }
6462   if (getLexer().isNot(AsmToken::Identifier))
6463     return MatchOperand_ParseFail;
6464   Token += getLexer().getTok().getString();
6465 
6466   StringRef DimId = Token;
6467   if (DimId.startswith("SQ_RSRC_IMG_"))
6468     DimId = DimId.substr(12);
6469 
6470   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6471   if (!DimInfo)
6472     return MatchOperand_ParseFail;
6473 
6474   Parser.Lex();
6475 
6476   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6477                                               AMDGPUOperand::ImmTyDim));
6478   return MatchOperand_Success;
6479 }
6480 
6481 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6482   SMLoc S = Parser.getTok().getLoc();
6483   StringRef Prefix;
6484 
6485   if (getLexer().getKind() == AsmToken::Identifier) {
6486     Prefix = Parser.getTok().getString();
6487   } else {
6488     return MatchOperand_NoMatch;
6489   }
6490 
6491   if (Prefix != "dpp8")
6492     return parseDPPCtrl(Operands);
6493   if (!isGFX10())
6494     return MatchOperand_NoMatch;
6495 
6496   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6497 
6498   int64_t Sels[8];
6499 
6500   Parser.Lex();
6501   if (getLexer().isNot(AsmToken::Colon))
6502     return MatchOperand_ParseFail;
6503 
6504   Parser.Lex();
6505   if (getLexer().isNot(AsmToken::LBrac))
6506     return MatchOperand_ParseFail;
6507 
6508   Parser.Lex();
6509   if (getParser().parseAbsoluteExpression(Sels[0]))
6510     return MatchOperand_ParseFail;
6511   if (0 > Sels[0] || 7 < Sels[0])
6512     return MatchOperand_ParseFail;
6513 
6514   for (size_t i = 1; i < 8; ++i) {
6515     if (getLexer().isNot(AsmToken::Comma))
6516       return MatchOperand_ParseFail;
6517 
6518     Parser.Lex();
6519     if (getParser().parseAbsoluteExpression(Sels[i]))
6520       return MatchOperand_ParseFail;
6521     if (0 > Sels[i] || 7 < Sels[i])
6522       return MatchOperand_ParseFail;
6523   }
6524 
6525   if (getLexer().isNot(AsmToken::RBrac))
6526     return MatchOperand_ParseFail;
6527   Parser.Lex();
6528 
6529   unsigned DPP8 = 0;
6530   for (size_t i = 0; i < 8; ++i)
6531     DPP8 |= (Sels[i] << (i * 3));
6532 
6533   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6534   return MatchOperand_Success;
6535 }
6536 
6537 OperandMatchResultTy
6538 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6539   using namespace AMDGPU::DPP;
6540 
6541   SMLoc S = Parser.getTok().getLoc();
6542   StringRef Prefix;
6543   int64_t Int;
6544 
6545   if (getLexer().getKind() == AsmToken::Identifier) {
6546     Prefix = Parser.getTok().getString();
6547   } else {
6548     return MatchOperand_NoMatch;
6549   }
6550 
6551   if (Prefix == "row_mirror") {
6552     Int = DppCtrl::ROW_MIRROR;
6553     Parser.Lex();
6554   } else if (Prefix == "row_half_mirror") {
6555     Int = DppCtrl::ROW_HALF_MIRROR;
6556     Parser.Lex();
6557   } else {
6558     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6559     if (Prefix != "quad_perm"
6560         && Prefix != "row_shl"
6561         && Prefix != "row_shr"
6562         && Prefix != "row_ror"
6563         && Prefix != "wave_shl"
6564         && Prefix != "wave_rol"
6565         && Prefix != "wave_shr"
6566         && Prefix != "wave_ror"
6567         && Prefix != "row_bcast"
6568         && Prefix != "row_share"
6569         && Prefix != "row_xmask") {
6570       return MatchOperand_NoMatch;
6571     }
6572 
6573     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6574       return MatchOperand_NoMatch;
6575 
6576     if (!isVI() && !isGFX9() &&
6577         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6578          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6579          Prefix == "row_bcast"))
6580       return MatchOperand_NoMatch;
6581 
6582     Parser.Lex();
6583     if (getLexer().isNot(AsmToken::Colon))
6584       return MatchOperand_ParseFail;
6585 
6586     if (Prefix == "quad_perm") {
6587       // quad_perm:[%d,%d,%d,%d]
6588       Parser.Lex();
6589       if (getLexer().isNot(AsmToken::LBrac))
6590         return MatchOperand_ParseFail;
6591       Parser.Lex();
6592 
6593       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6594         return MatchOperand_ParseFail;
6595 
6596       for (int i = 0; i < 3; ++i) {
6597         if (getLexer().isNot(AsmToken::Comma))
6598           return MatchOperand_ParseFail;
6599         Parser.Lex();
6600 
6601         int64_t Temp;
6602         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6603           return MatchOperand_ParseFail;
6604         const int shift = i*2 + 2;
6605         Int += (Temp << shift);
6606       }
6607 
6608       if (getLexer().isNot(AsmToken::RBrac))
6609         return MatchOperand_ParseFail;
6610       Parser.Lex();
6611     } else {
6612       // sel:%d
6613       Parser.Lex();
6614       if (getParser().parseAbsoluteExpression(Int))
6615         return MatchOperand_ParseFail;
6616 
6617       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6618         Int |= DppCtrl::ROW_SHL0;
6619       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6620         Int |= DppCtrl::ROW_SHR0;
6621       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6622         Int |= DppCtrl::ROW_ROR0;
6623       } else if (Prefix == "wave_shl" && 1 == Int) {
6624         Int = DppCtrl::WAVE_SHL1;
6625       } else if (Prefix == "wave_rol" && 1 == Int) {
6626         Int = DppCtrl::WAVE_ROL1;
6627       } else if (Prefix == "wave_shr" && 1 == Int) {
6628         Int = DppCtrl::WAVE_SHR1;
6629       } else if (Prefix == "wave_ror" && 1 == Int) {
6630         Int = DppCtrl::WAVE_ROR1;
6631       } else if (Prefix == "row_bcast") {
6632         if (Int == 15) {
6633           Int = DppCtrl::BCAST15;
6634         } else if (Int == 31) {
6635           Int = DppCtrl::BCAST31;
6636         } else {
6637           return MatchOperand_ParseFail;
6638         }
6639       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6640         Int |= DppCtrl::ROW_SHARE_FIRST;
6641       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6642         Int |= DppCtrl::ROW_XMASK_FIRST;
6643       } else {
6644         return MatchOperand_ParseFail;
6645       }
6646     }
6647   }
6648 
6649   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6650   return MatchOperand_Success;
6651 }
6652 
6653 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6654   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6655 }
6656 
6657 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6658   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6659 }
6660 
6661 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6662   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6663 }
6664 
6665 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6666   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6667 }
6668 
6669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6670   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6671 }
6672 
6673 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6674   OptionalImmIndexMap OptionalIdx;
6675 
6676   unsigned I = 1;
6677   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6678   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6679     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6680   }
6681 
6682   int Fi = 0;
6683   for (unsigned E = Operands.size(); I != E; ++I) {
6684     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6685                                             MCOI::TIED_TO);
6686     if (TiedTo != -1) {
6687       assert((unsigned)TiedTo < Inst.getNumOperands());
6688       // handle tied old or src2 for MAC instructions
6689       Inst.addOperand(Inst.getOperand(TiedTo));
6690     }
6691     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6692     // Add the register arguments
6693     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6694       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6695       // Skip it.
6696       continue;
6697     }
6698 
6699     if (IsDPP8) {
6700       if (Op.isDPP8()) {
6701         Op.addImmOperands(Inst, 1);
6702       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6703         Op.addRegWithFPInputModsOperands(Inst, 2);
6704       } else if (Op.isFI()) {
6705         Fi = Op.getImm();
6706       } else if (Op.isReg()) {
6707         Op.addRegOperands(Inst, 1);
6708       } else {
6709         llvm_unreachable("Invalid operand type");
6710       }
6711     } else {
6712       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6713         Op.addRegWithFPInputModsOperands(Inst, 2);
6714       } else if (Op.isDPPCtrl()) {
6715         Op.addImmOperands(Inst, 1);
6716       } else if (Op.isImm()) {
6717         // Handle optional arguments
6718         OptionalIdx[Op.getImmTy()] = I;
6719       } else {
6720         llvm_unreachable("Invalid operand type");
6721       }
6722     }
6723   }
6724 
6725   if (IsDPP8) {
6726     using namespace llvm::AMDGPU::DPP;
6727     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6728   } else {
6729     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6730     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6731     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6732     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6733       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6734     }
6735   }
6736 }
6737 
6738 //===----------------------------------------------------------------------===//
6739 // sdwa
6740 //===----------------------------------------------------------------------===//
6741 
6742 OperandMatchResultTy
6743 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6744                               AMDGPUOperand::ImmTy Type) {
6745   using namespace llvm::AMDGPU::SDWA;
6746 
6747   SMLoc S = Parser.getTok().getLoc();
6748   StringRef Value;
6749   OperandMatchResultTy res;
6750 
6751   res = parseStringWithPrefix(Prefix, Value);
6752   if (res != MatchOperand_Success) {
6753     return res;
6754   }
6755 
6756   int64_t Int;
6757   Int = StringSwitch<int64_t>(Value)
6758         .Case("BYTE_0", SdwaSel::BYTE_0)
6759         .Case("BYTE_1", SdwaSel::BYTE_1)
6760         .Case("BYTE_2", SdwaSel::BYTE_2)
6761         .Case("BYTE_3", SdwaSel::BYTE_3)
6762         .Case("WORD_0", SdwaSel::WORD_0)
6763         .Case("WORD_1", SdwaSel::WORD_1)
6764         .Case("DWORD", SdwaSel::DWORD)
6765         .Default(0xffffffff);
6766   Parser.Lex(); // eat last token
6767 
6768   if (Int == 0xffffffff) {
6769     return MatchOperand_ParseFail;
6770   }
6771 
6772   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6773   return MatchOperand_Success;
6774 }
6775 
6776 OperandMatchResultTy
6777 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6778   using namespace llvm::AMDGPU::SDWA;
6779 
6780   SMLoc S = Parser.getTok().getLoc();
6781   StringRef Value;
6782   OperandMatchResultTy res;
6783 
6784   res = parseStringWithPrefix("dst_unused", Value);
6785   if (res != MatchOperand_Success) {
6786     return res;
6787   }
6788 
6789   int64_t Int;
6790   Int = StringSwitch<int64_t>(Value)
6791         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6792         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6793         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6794         .Default(0xffffffff);
6795   Parser.Lex(); // eat last token
6796 
6797   if (Int == 0xffffffff) {
6798     return MatchOperand_ParseFail;
6799   }
6800 
6801   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6802   return MatchOperand_Success;
6803 }
6804 
6805 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6806   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6807 }
6808 
6809 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6810   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6811 }
6812 
6813 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6814   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6815 }
6816 
6817 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6818   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6819 }
6820 
6821 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6822                               uint64_t BasicInstType, bool skipVcc) {
6823   using namespace llvm::AMDGPU::SDWA;
6824 
6825   OptionalImmIndexMap OptionalIdx;
6826   bool skippedVcc = false;
6827 
6828   unsigned I = 1;
6829   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6830   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6831     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6832   }
6833 
6834   for (unsigned E = Operands.size(); I != E; ++I) {
6835     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6836     if (skipVcc && !skippedVcc && Op.isReg() &&
6837         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6838       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6839       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6840       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6841       // Skip VCC only if we didn't skip it on previous iteration.
6842       if (BasicInstType == SIInstrFlags::VOP2 &&
6843           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6844         skippedVcc = true;
6845         continue;
6846       } else if (BasicInstType == SIInstrFlags::VOPC &&
6847                  Inst.getNumOperands() == 0) {
6848         skippedVcc = true;
6849         continue;
6850       }
6851     }
6852     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6853       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6854     } else if (Op.isImm()) {
6855       // Handle optional arguments
6856       OptionalIdx[Op.getImmTy()] = I;
6857     } else {
6858       llvm_unreachable("Invalid operand type");
6859     }
6860     skippedVcc = false;
6861   }
6862 
6863   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6864       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6865       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6866     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6867     switch (BasicInstType) {
6868     case SIInstrFlags::VOP1:
6869       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6870       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6871         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6872       }
6873       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6874       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6875       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6876       break;
6877 
6878     case SIInstrFlags::VOP2:
6879       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6880       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6881         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6882       }
6883       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6884       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6885       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6886       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6887       break;
6888 
6889     case SIInstrFlags::VOPC:
6890       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6891         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6892       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6893       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6894       break;
6895 
6896     default:
6897       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6898     }
6899   }
6900 
6901   // special case v_mac_{f16, f32}:
6902   // it has src2 register operand that is tied to dst operand
6903   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6904       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6905     auto it = Inst.begin();
6906     std::advance(
6907       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6908     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6909   }
6910 }
6911 
6912 //===----------------------------------------------------------------------===//
6913 // mAI
6914 //===----------------------------------------------------------------------===//
6915 
6916 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6917   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6918 }
6919 
6920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6921   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6922 }
6923 
6924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6925   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6926 }
6927 
6928 /// Force static initialization.
6929 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6930   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6931   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6932 }
6933 
6934 #define GET_REGISTER_MATCHER
6935 #define GET_MATCHER_IMPLEMENTATION
6936 #define GET_MNEMONIC_SPELL_CHECKER
6937 #include "AMDGPUGenAsmMatcher.inc"
6938 
6939 // This fuction should be defined after auto-generated include so that we have
6940 // MatchClassKind enum defined
6941 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6942                                                      unsigned Kind) {
6943   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6944   // But MatchInstructionImpl() expects to meet token and fails to validate
6945   // operand. This method checks if we are given immediate operand but expect to
6946   // get corresponding token.
6947   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6948   switch (Kind) {
6949   case MCK_addr64:
6950     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6951   case MCK_gds:
6952     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6953   case MCK_lds:
6954     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6955   case MCK_glc:
6956     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6957   case MCK_idxen:
6958     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6959   case MCK_offen:
6960     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6961   case MCK_SSrcB32:
6962     // When operands have expression values, they will return true for isToken,
6963     // because it is not possible to distinguish between a token and an
6964     // expression at parse time. MatchInstructionImpl() will always try to
6965     // match an operand as a token, when isToken returns true, and when the
6966     // name of the expression is not a valid token, the match will fail,
6967     // so we need to handle it here.
6968     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6969   case MCK_SSrcF32:
6970     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6971   case MCK_SoppBrTarget:
6972     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6973   case MCK_VReg32OrOff:
6974     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6975   case MCK_InterpSlot:
6976     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6977   case MCK_Attr:
6978     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6979   case MCK_AttrChan:
6980     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6981   default:
6982     return Match_InvalidOperand;
6983   }
6984 }
6985 
6986 //===----------------------------------------------------------------------===//
6987 // endpgm
6988 //===----------------------------------------------------------------------===//
6989 
6990 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6991   SMLoc S = Parser.getTok().getLoc();
6992   int64_t Imm = 0;
6993 
6994   if (!parseExpr(Imm)) {
6995     // The operand is optional, if not present default to 0
6996     Imm = 0;
6997   }
6998 
6999   if (!isUInt<16>(Imm)) {
7000     Error(S, "expected a 16-bit value");
7001     return MatchOperand_ParseFail;
7002   }
7003 
7004   Operands.push_back(
7005       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7006   return MatchOperand_Success;
7007 }
7008 
7009 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7010