1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isSymbolRefExpr();
224   }
225 
226   bool isSymbolRefExpr() const {
227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228   }
229 
230   bool isImm() const override {
231     return Kind == Immediate;
232   }
233 
234   bool isInlinableImm(MVT type) const;
235   bool isLiteralImm(MVT type) const;
236 
237   bool isRegKind() const {
238     return Kind == Register;
239   }
240 
241   bool isReg() const override {
242     return isRegKind() && !hasModifiers();
243   }
244 
245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247   }
248 
249   bool isRegOrImmWithInt16InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251   }
252 
253   bool isRegOrImmWithInt32InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255   }
256 
257   bool isRegOrImmWithInt64InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259   }
260 
261   bool isRegOrImmWithFP16InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263   }
264 
265   bool isRegOrImmWithFP32InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267   }
268 
269   bool isRegOrImmWithFP64InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271   }
272 
273   bool isVReg() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275            isRegClass(AMDGPU::VReg_64RegClassID) ||
276            isRegClass(AMDGPU::VReg_96RegClassID) ||
277            isRegClass(AMDGPU::VReg_128RegClassID) ||
278            isRegClass(AMDGPU::VReg_160RegClassID) ||
279            isRegClass(AMDGPU::VReg_256RegClassID) ||
280            isRegClass(AMDGPU::VReg_512RegClassID) ||
281            isRegClass(AMDGPU::VReg_1024RegClassID);
282   }
283 
284   bool isVReg32() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID);
286   }
287 
288   bool isVReg32OrOff() const {
289     return isOff() || isVReg32();
290   }
291 
292   bool isSDWAOperand(MVT type) const;
293   bool isSDWAFP16Operand() const;
294   bool isSDWAFP32Operand() const;
295   bool isSDWAInt16Operand() const;
296   bool isSDWAInt32Operand() const;
297 
298   bool isImmTy(ImmTy ImmT) const {
299     return isImm() && Imm.Type == ImmT;
300   }
301 
302   bool isImmModifier() const {
303     return isImm() && Imm.Type != ImmTyNone;
304   }
305 
306   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308   bool isDMask() const { return isImmTy(ImmTyDMask); }
309   bool isDim() const { return isImmTy(ImmTyDim); }
310   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311   bool isDA() const { return isImmTy(ImmTyDA); }
312   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313   bool isLWE() const { return isImmTy(ImmTyLWE); }
314   bool isOff() const { return isImmTy(ImmTyOff); }
315   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318   bool isOffen() const { return isImmTy(ImmTyOffen); }
319   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326   bool isGDS() const { return isImmTy(ImmTyGDS); }
327   bool isLDS() const { return isImmTy(ImmTyLDS); }
328   bool isDLC() const { return isImmTy(ImmTyDLC); }
329   bool isGLC() const { return isImmTy(ImmTyGLC); }
330   bool isSLC() const { return isImmTy(ImmTySLC); }
331   bool isTFE() const { return isImmTy(ImmTyTFE); }
332   bool isD16() const { return isImmTy(ImmTyD16); }
333   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337   bool isFI() const { return isImmTy(ImmTyDppFi); }
338   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349   bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351   bool isMod() const {
352     return isClampSI() || isOModSI();
353   }
354 
355   bool isRegOrImm() const {
356     return isReg() || isImm();
357   }
358 
359   bool isRegClass(unsigned RCID) const;
360 
361   bool isInlineValue() const;
362 
363   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365   }
366 
367   bool isSCSrcB16() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369   }
370 
371   bool isSCSrcV2B16() const {
372     return isSCSrcB16();
373   }
374 
375   bool isSCSrcB32() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377   }
378 
379   bool isSCSrcB64() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381   }
382 
383   bool isBoolReg() const;
384 
385   bool isSCSrcF16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387   }
388 
389   bool isSCSrcV2F16() const {
390     return isSCSrcF16();
391   }
392 
393   bool isSCSrcF32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395   }
396 
397   bool isSCSrcF64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399   }
400 
401   bool isSSrcB32() const {
402     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403   }
404 
405   bool isSSrcB16() const {
406     return isSCSrcB16() || isLiteralImm(MVT::i16);
407   }
408 
409   bool isSSrcV2B16() const {
410     llvm_unreachable("cannot happen");
411     return isSSrcB16();
412   }
413 
414   bool isSSrcB64() const {
415     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416     // See isVSrc64().
417     return isSCSrcB64() || isLiteralImm(MVT::i64);
418   }
419 
420   bool isSSrcF32() const {
421     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422   }
423 
424   bool isSSrcF64() const {
425     return isSCSrcB64() || isLiteralImm(MVT::f64);
426   }
427 
428   bool isSSrcF16() const {
429     return isSCSrcB16() || isLiteralImm(MVT::f16);
430   }
431 
432   bool isSSrcV2F16() const {
433     llvm_unreachable("cannot happen");
434     return isSSrcF16();
435   }
436 
437   bool isSSrcOrLdsB32() const {
438     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439            isLiteralImm(MVT::i32) || isExpr();
440   }
441 
442   bool isVCSrcB32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444   }
445 
446   bool isVCSrcB64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448   }
449 
450   bool isVCSrcB16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452   }
453 
454   bool isVCSrcV2B16() const {
455     return isVCSrcB16();
456   }
457 
458   bool isVCSrcF32() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460   }
461 
462   bool isVCSrcF64() const {
463     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464   }
465 
466   bool isVCSrcF16() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468   }
469 
470   bool isVCSrcV2F16() const {
471     return isVCSrcF16();
472   }
473 
474   bool isVSrcB32() const {
475     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476   }
477 
478   bool isVSrcB64() const {
479     return isVCSrcF64() || isLiteralImm(MVT::i64);
480   }
481 
482   bool isVSrcB16() const {
483     return isVCSrcF16() || isLiteralImm(MVT::i16);
484   }
485 
486   bool isVSrcV2B16() const {
487     return isVSrcB16() || isLiteralImm(MVT::v2i16);
488   }
489 
490   bool isVSrcF32() const {
491     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492   }
493 
494   bool isVSrcF64() const {
495     return isVCSrcF64() || isLiteralImm(MVT::f64);
496   }
497 
498   bool isVSrcF16() const {
499     return isVCSrcF16() || isLiteralImm(MVT::f16);
500   }
501 
502   bool isVSrcV2F16() const {
503     return isVSrcF16() || isLiteralImm(MVT::v2f16);
504   }
505 
506   bool isVISrcB32() const {
507     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508   }
509 
510   bool isVISrcB16() const {
511     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512   }
513 
514   bool isVISrcV2B16() const {
515     return isVISrcB16();
516   }
517 
518   bool isVISrcF32() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520   }
521 
522   bool isVISrcF16() const {
523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524   }
525 
526   bool isVISrcV2F16() const {
527     return isVISrcF16() || isVISrcB32();
528   }
529 
530   bool isAISrcB32() const {
531     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532   }
533 
534   bool isAISrcB16() const {
535     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536   }
537 
538   bool isAISrcV2B16() const {
539     return isAISrcB16();
540   }
541 
542   bool isAISrcF32() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544   }
545 
546   bool isAISrcF16() const {
547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548   }
549 
550   bool isAISrcV2F16() const {
551     return isAISrcF16() || isAISrcB32();
552   }
553 
554   bool isAISrc_128B32() const {
555     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556   }
557 
558   bool isAISrc_128B16() const {
559     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560   }
561 
562   bool isAISrc_128V2B16() const {
563     return isAISrc_128B16();
564   }
565 
566   bool isAISrc_128F32() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568   }
569 
570   bool isAISrc_128F16() const {
571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572   }
573 
574   bool isAISrc_128V2F16() const {
575     return isAISrc_128F16() || isAISrc_128B32();
576   }
577 
578   bool isAISrc_512B32() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580   }
581 
582   bool isAISrc_512B16() const {
583     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584   }
585 
586   bool isAISrc_512V2B16() const {
587     return isAISrc_512B16();
588   }
589 
590   bool isAISrc_512F32() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592   }
593 
594   bool isAISrc_512F16() const {
595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596   }
597 
598   bool isAISrc_512V2F16() const {
599     return isAISrc_512F16() || isAISrc_512B32();
600   }
601 
602   bool isAISrc_1024B32() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604   }
605 
606   bool isAISrc_1024B16() const {
607     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608   }
609 
610   bool isAISrc_1024V2B16() const {
611     return isAISrc_1024B16();
612   }
613 
614   bool isAISrc_1024F32() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616   }
617 
618   bool isAISrc_1024F16() const {
619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620   }
621 
622   bool isAISrc_1024V2F16() const {
623     return isAISrc_1024F16() || isAISrc_1024B32();
624   }
625 
626   bool isKImmFP32() const {
627     return isLiteralImm(MVT::f32);
628   }
629 
630   bool isKImmFP16() const {
631     return isLiteralImm(MVT::f16);
632   }
633 
634   bool isMem() const override {
635     return false;
636   }
637 
638   bool isExpr() const {
639     return Kind == Expression;
640   }
641 
642   bool isSoppBrTarget() const {
643     return isExpr() || isImm();
644   }
645 
646   bool isSWaitCnt() const;
647   bool isHwreg() const;
648   bool isSendMsg() const;
649   bool isSwizzle() const;
650   bool isSMRDOffset8() const;
651   bool isSMRDOffset20() const;
652   bool isSMRDLiteralOffset() const;
653   bool isDPP8() const;
654   bool isDPPCtrl() const;
655   bool isBLGP() const;
656   bool isCBSZ() const;
657   bool isABID() const;
658   bool isGPRIdxMode() const;
659   bool isS16Imm() const;
660   bool isU16Imm() const;
661   bool isEndpgm() const;
662 
663   StringRef getExpressionAsToken() const {
664     assert(isExpr());
665     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666     return S->getSymbol().getName();
667   }
668 
669   StringRef getToken() const {
670     assert(isToken());
671 
672     if (Kind == Expression)
673       return getExpressionAsToken();
674 
675     return StringRef(Tok.Data, Tok.Length);
676   }
677 
678   int64_t getImm() const {
679     assert(isImm());
680     return Imm.Val;
681   }
682 
683   ImmTy getImmTy() const {
684     assert(isImm());
685     return Imm.Type;
686   }
687 
688   unsigned getReg() const override {
689     assert(isRegKind());
690     return Reg.RegNo;
691   }
692 
693   SMLoc getStartLoc() const override {
694     return StartLoc;
695   }
696 
697   SMLoc getEndLoc() const override {
698     return EndLoc;
699   }
700 
701   SMRange getLocRange() const {
702     return SMRange(StartLoc, EndLoc);
703   }
704 
705   Modifiers getModifiers() const {
706     assert(isRegKind() || isImmTy(ImmTyNone));
707     return isRegKind() ? Reg.Mods : Imm.Mods;
708   }
709 
710   void setModifiers(Modifiers Mods) {
711     assert(isRegKind() || isImmTy(ImmTyNone));
712     if (isRegKind())
713       Reg.Mods = Mods;
714     else
715       Imm.Mods = Mods;
716   }
717 
718   bool hasModifiers() const {
719     return getModifiers().hasModifiers();
720   }
721 
722   bool hasFPModifiers() const {
723     return getModifiers().hasFPModifiers();
724   }
725 
726   bool hasIntModifiers() const {
727     return getModifiers().hasIntModifiers();
728   }
729 
730   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736   template <unsigned Bitwidth>
737   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740     addKImmFPOperands<16>(Inst, N);
741   }
742 
743   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744     addKImmFPOperands<32>(Inst, N);
745   }
746 
747   void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750     addRegOperands(Inst, N);
751   }
752 
753   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754     if (isRegKind())
755       addRegOperands(Inst, N);
756     else if (isExpr())
757       Inst.addOperand(MCOperand::createExpr(Expr));
758     else
759       addImmOperands(Inst, N);
760   }
761 
762   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763     Modifiers Mods = getModifiers();
764     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765     if (isRegKind()) {
766       addRegOperands(Inst, N);
767     } else {
768       addImmOperands(Inst, N, false);
769     }
770   }
771 
772   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773     assert(!hasIntModifiers());
774     addRegOrImmWithInputModsOperands(Inst, N);
775   }
776 
777   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778     assert(!hasFPModifiers());
779     addRegOrImmWithInputModsOperands(Inst, N);
780   }
781 
782   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     assert(isRegKind());
786     addRegOperands(Inst, N);
787   }
788 
789   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790     assert(!hasIntModifiers());
791     addRegWithInputModsOperands(Inst, N);
792   }
793 
794   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795     assert(!hasFPModifiers());
796     addRegWithInputModsOperands(Inst, N);
797   }
798 
799   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800     if (isImm())
801       addImmOperands(Inst, N);
802     else {
803       assert(isExpr());
804       Inst.addOperand(MCOperand::createExpr(Expr));
805     }
806   }
807 
808   static void printImmTy(raw_ostream& OS, ImmTy Type) {
809     switch (Type) {
810     case ImmTyNone: OS << "None"; break;
811     case ImmTyGDS: OS << "GDS"; break;
812     case ImmTyLDS: OS << "LDS"; break;
813     case ImmTyOffen: OS << "Offen"; break;
814     case ImmTyIdxen: OS << "Idxen"; break;
815     case ImmTyAddr64: OS << "Addr64"; break;
816     case ImmTyOffset: OS << "Offset"; break;
817     case ImmTyInstOffset: OS << "InstOffset"; break;
818     case ImmTyOffset0: OS << "Offset0"; break;
819     case ImmTyOffset1: OS << "Offset1"; break;
820     case ImmTyDLC: OS << "DLC"; break;
821     case ImmTyGLC: OS << "GLC"; break;
822     case ImmTySLC: OS << "SLC"; break;
823     case ImmTyTFE: OS << "TFE"; break;
824     case ImmTyD16: OS << "D16"; break;
825     case ImmTyFORMAT: OS << "FORMAT"; break;
826     case ImmTyClampSI: OS << "ClampSI"; break;
827     case ImmTyOModSI: OS << "OModSI"; break;
828     case ImmTyDPP8: OS << "DPP8"; break;
829     case ImmTyDppCtrl: OS << "DppCtrl"; break;
830     case ImmTyDppRowMask: OS << "DppRowMask"; break;
831     case ImmTyDppBankMask: OS << "DppBankMask"; break;
832     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833     case ImmTyDppFi: OS << "FI"; break;
834     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838     case ImmTyDMask: OS << "DMask"; break;
839     case ImmTyDim: OS << "Dim"; break;
840     case ImmTyUNorm: OS << "UNorm"; break;
841     case ImmTyDA: OS << "DA"; break;
842     case ImmTyR128A16: OS << "R128A16"; break;
843     case ImmTyLWE: OS << "LWE"; break;
844     case ImmTyOff: OS << "Off"; break;
845     case ImmTyExpTgt: OS << "ExpTgt"; break;
846     case ImmTyExpCompr: OS << "ExpCompr"; break;
847     case ImmTyExpVM: OS << "ExpVM"; break;
848     case ImmTyHwreg: OS << "Hwreg"; break;
849     case ImmTySendMsg: OS << "SendMsg"; break;
850     case ImmTyInterpSlot: OS << "InterpSlot"; break;
851     case ImmTyInterpAttr: OS << "InterpAttr"; break;
852     case ImmTyAttrChan: OS << "AttrChan"; break;
853     case ImmTyOpSel: OS << "OpSel"; break;
854     case ImmTyOpSelHi: OS << "OpSelHi"; break;
855     case ImmTyNegLo: OS << "NegLo"; break;
856     case ImmTyNegHi: OS << "NegHi"; break;
857     case ImmTySwizzle: OS << "Swizzle"; break;
858     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859     case ImmTyHigh: OS << "High"; break;
860     case ImmTyBLGP: OS << "BLGP"; break;
861     case ImmTyCBSZ: OS << "CBSZ"; break;
862     case ImmTyABID: OS << "ABID"; break;
863     case ImmTyEndpgm: OS << "Endpgm"; break;
864     }
865   }
866 
867   void print(raw_ostream &OS) const override {
868     switch (Kind) {
869     case Register:
870       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871       break;
872     case Immediate:
873       OS << '<' << getImm();
874       if (getImmTy() != ImmTyNone) {
875         OS << " type: "; printImmTy(OS, getImmTy());
876       }
877       OS << " mods: " << Imm.Mods << '>';
878       break;
879     case Token:
880       OS << '\'' << getToken() << '\'';
881       break;
882     case Expression:
883       OS << "<expr " << *Expr << '>';
884       break;
885     }
886   }
887 
888   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889                                       int64_t Val, SMLoc Loc,
890                                       ImmTy Type = ImmTyNone,
891                                       bool IsFPImm = false) {
892     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893     Op->Imm.Val = Val;
894     Op->Imm.IsFPImm = IsFPImm;
895     Op->Imm.Type = Type;
896     Op->Imm.Mods = Modifiers();
897     Op->StartLoc = Loc;
898     Op->EndLoc = Loc;
899     return Op;
900   }
901 
902   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903                                         StringRef Str, SMLoc Loc,
904                                         bool HasExplicitEncodingSize = true) {
905     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906     Res->Tok.Data = Str.data();
907     Res->Tok.Length = Str.size();
908     Res->StartLoc = Loc;
909     Res->EndLoc = Loc;
910     return Res;
911   }
912 
913   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914                                       unsigned RegNo, SMLoc S,
915                                       SMLoc E) {
916     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917     Op->Reg.RegNo = RegNo;
918     Op->Reg.Mods = Modifiers();
919     Op->StartLoc = S;
920     Op->EndLoc = E;
921     return Op;
922   }
923 
924   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925                                        const class MCExpr *Expr, SMLoc S) {
926     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927     Op->Expr = Expr;
928     Op->StartLoc = S;
929     Op->EndLoc = S;
930     return Op;
931   }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936   return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947   int SgprIndexUnusedMin = -1;
948   int VgprIndexUnusedMin = -1;
949   MCContext *Ctx = nullptr;
950 
951   void usesSgprAt(int i) {
952     if (i >= SgprIndexUnusedMin) {
953       SgprIndexUnusedMin = ++i;
954       if (Ctx) {
955         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957       }
958     }
959   }
960 
961   void usesVgprAt(int i) {
962     if (i >= VgprIndexUnusedMin) {
963       VgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971 public:
972   KernelScopeInfo() = default;
973 
974   void initialize(MCContext &Context) {
975     Ctx = &Context;
976     usesSgprAt(SgprIndexUnusedMin = -1);
977     usesVgprAt(VgprIndexUnusedMin = -1);
978   }
979 
980   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981     switch (RegKind) {
982       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983       case IS_AGPR: // fall through
984       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985       default: break;
986     }
987   }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991   MCAsmParser &Parser;
992 
993   // Number of extra operands parsed after the first optional operand.
994   // This may be necessary to skip hardcoded mandatory operands.
995   static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997   unsigned ForcedEncodingSize = 0;
998   bool ForcedDPP = false;
999   bool ForcedSDWA = false;
1000   KernelScopeInfo KernelScope;
1001 
1002   /// @name Auto-generated Match Functions
1003   /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008   /// }
1009 
1010 private:
1011   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012   bool OutOfRangeError(SMRange Range);
1013   /// Calculate VGPR/SGPR blocks required for given target, reserved
1014   /// registers, and user-specified NextFreeXGPR values.
1015   ///
1016   /// \param Features [in] Target features, used for bug corrections.
1017   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021   /// descriptor field, if valid.
1022   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026   /// \param VGPRBlocks [out] Result VGPR block count.
1027   /// \param SGPRBlocks [out] Result SGPR block count.
1028   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029                           bool FlatScrUsed, bool XNACKUsed,
1030                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031                           SMRange VGPRRange, unsigned NextFreeSGPR,
1032                           SMRange SGPRRange, unsigned &VGPRBlocks,
1033                           unsigned &SGPRBlocks);
1034   bool ParseDirectiveAMDGCNTarget();
1035   bool ParseDirectiveAMDHSAKernel();
1036   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037   bool ParseDirectiveHSACodeObjectVersion();
1038   bool ParseDirectiveHSACodeObjectISA();
1039   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040   bool ParseDirectiveAMDKernelCodeT();
1041   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042   bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044   bool ParseDirectiveISAVersion();
1045   bool ParseDirectiveHSAMetadata();
1046   bool ParseDirectivePALMetadataBegin();
1047   bool ParseDirectivePALMetadata();
1048   bool ParseDirectiveAMDGPULDS();
1049 
1050   /// Common code to parse out a block of text (typically YAML) between start and
1051   /// end directives.
1052   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053                            const char *AssemblerDirectiveEnd,
1054                            std::string &CollectString);
1055 
1056   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057                              RegisterKind RegKind, unsigned Reg1,
1058                              unsigned RegNum);
1059   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060                            unsigned& RegNum, unsigned& RegWidth,
1061                            unsigned *DwordRegIndex);
1062   bool isRegister();
1063   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065   void initializeGprCountSymbol(RegisterKind RegKind);
1066   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067                              unsigned RegWidth);
1068   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071                  bool IsGdsHardcoded);
1072 
1073 public:
1074   enum AMDGPUMatchResultTy {
1075     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076   };
1077   enum OperandMode {
1078     OperandMode_Default,
1079     OperandMode_NSA,
1080   };
1081 
1082   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083 
1084   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085                const MCInstrInfo &MII,
1086                const MCTargetOptions &Options)
1087       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088     MCAsmParserExtension::Initialize(Parser);
1089 
1090     if (getFeatureBits().none()) {
1091       // Set default features.
1092       copySTI().ToggleFeature("southern-islands");
1093     }
1094 
1095     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096 
1097     {
1098       // TODO: make those pre-defined variables read-only.
1099       // Currently there is none suitable machinery in the core llvm-mc for this.
1100       // MCSymbol::isRedefinable is intended for another purpose, and
1101       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103       MCContext &Ctx = getContext();
1104       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105         MCSymbol *Sym =
1106             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112       } else {
1113         MCSymbol *Sym =
1114             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120       }
1121       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122         initializeGprCountSymbol(IS_VGPR);
1123         initializeGprCountSymbol(IS_SGPR);
1124       } else
1125         KernelScope.initialize(getContext());
1126     }
1127   }
1128 
1129   bool hasXNACK() const {
1130     return AMDGPU::hasXNACK(getSTI());
1131   }
1132 
1133   bool hasMIMG_R128() const {
1134     return AMDGPU::hasMIMG_R128(getSTI());
1135   }
1136 
1137   bool hasPackedD16() const {
1138     return AMDGPU::hasPackedD16(getSTI());
1139   }
1140 
1141   bool isSI() const {
1142     return AMDGPU::isSI(getSTI());
1143   }
1144 
1145   bool isCI() const {
1146     return AMDGPU::isCI(getSTI());
1147   }
1148 
1149   bool isVI() const {
1150     return AMDGPU::isVI(getSTI());
1151   }
1152 
1153   bool isGFX9() const {
1154     return AMDGPU::isGFX9(getSTI());
1155   }
1156 
1157   bool isGFX10() const {
1158     return AMDGPU::isGFX10(getSTI());
1159   }
1160 
1161   bool hasInv2PiInlineImm() const {
1162     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163   }
1164 
1165   bool hasFlatOffsets() const {
1166     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167   }
1168 
1169   bool hasSGPR102_SGPR103() const {
1170     return !isVI() && !isGFX9();
1171   }
1172 
1173   bool hasSGPR104_SGPR105() const {
1174     return isGFX10();
1175   }
1176 
1177   bool hasIntClamp() const {
1178     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179   }
1180 
1181   AMDGPUTargetStreamer &getTargetStreamer() {
1182     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183     return static_cast<AMDGPUTargetStreamer &>(TS);
1184   }
1185 
1186   const MCRegisterInfo *getMRI() const {
1187     // We need this const_cast because for some reason getContext() is not const
1188     // in MCAsmParser.
1189     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190   }
1191 
1192   const MCInstrInfo *getMII() const {
1193     return &MII;
1194   }
1195 
1196   const FeatureBitset &getFeatureBits() const {
1197     return getSTI().getFeatureBits();
1198   }
1199 
1200   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203 
1204   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206   bool isForcedDPP() const { return ForcedDPP; }
1207   bool isForcedSDWA() const { return ForcedSDWA; }
1208   ArrayRef<unsigned> getMatchedVariants() const;
1209 
1210   std::unique_ptr<AMDGPUOperand> parseRegister();
1211   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214                                       unsigned Kind) override;
1215   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216                                OperandVector &Operands, MCStreamer &Out,
1217                                uint64_t &ErrorInfo,
1218                                bool MatchingInlineAsm) override;
1219   bool ParseDirective(AsmToken DirectiveID) override;
1220   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221                                     OperandMode Mode = OperandMode_Default);
1222   StringRef parseMnemonicSuffix(StringRef Name);
1223   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224                         SMLoc NameLoc, OperandVector &Operands) override;
1225   //bool ProcessInstruction(MCInst &Inst);
1226 
1227   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228 
1229   OperandMatchResultTy
1230   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232                      bool (*ConvertResult)(int64_t &) = nullptr);
1233 
1234   OperandMatchResultTy
1235   parseOperandArrayWithPrefix(const char *Prefix,
1236                               OperandVector &Operands,
1237                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238                               bool (*ConvertResult)(int64_t&) = nullptr);
1239 
1240   OperandMatchResultTy
1241   parseNamedBit(const char *Name, OperandVector &Operands,
1242                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244                                              StringRef &Value);
1245 
1246   bool isModifier();
1247   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251   bool parseSP3NegModifier();
1252   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253   OperandMatchResultTy parseReg(OperandVector &Operands);
1254   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261 
1262   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266 
1267   bool parseCnt(int64_t &IntVal);
1268   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270 
1271 private:
1272   struct OperandInfoTy {
1273     int64_t Id;
1274     bool IsSymbolic = false;
1275     bool IsDefined = false;
1276 
1277     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278   };
1279 
1280   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281   bool validateSendMsg(const OperandInfoTy &Msg,
1282                        const OperandInfoTy &Op,
1283                        const OperandInfoTy &Stream,
1284                        const SMLoc Loc);
1285 
1286   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287   bool validateHwreg(const OperandInfoTy &HwReg,
1288                      const int64_t Offset,
1289                      const int64_t Width,
1290                      const SMLoc Loc);
1291 
1292   void errorExpTgt();
1293   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295 
1296   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298   bool validateSOPLiteral(const MCInst &Inst) const;
1299   bool validateConstantBusLimitations(const MCInst &Inst);
1300   bool validateEarlyClobberLimitations(const MCInst &Inst);
1301   bool validateIntClampSupported(const MCInst &Inst);
1302   bool validateMIMGAtomicDMask(const MCInst &Inst);
1303   bool validateMIMGGatherDMask(const MCInst &Inst);
1304   bool validateMIMGDataSize(const MCInst &Inst);
1305   bool validateMIMGAddrSize(const MCInst &Inst);
1306   bool validateMIMGD16(const MCInst &Inst);
1307   bool validateMIMGDim(const MCInst &Inst);
1308   bool validateLdsDirect(const MCInst &Inst);
1309   bool validateOpSel(const MCInst &Inst);
1310   bool validateVccOperand(unsigned Reg) const;
1311   bool validateVOP3Literal(const MCInst &Inst) const;
1312   unsigned getConstantBusLimit(unsigned Opcode) const;
1313   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1314   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1315   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 
1317   bool isId(const StringRef Id) const;
1318   bool isId(const AsmToken &Token, const StringRef Id) const;
1319   bool isToken(const AsmToken::TokenKind Kind) const;
1320   bool trySkipId(const StringRef Id);
1321   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1322   bool trySkipToken(const AsmToken::TokenKind Kind);
1323   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1324   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1325   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1326   AsmToken::TokenKind getTokenKind() const;
1327   bool parseExpr(int64_t &Imm);
1328   bool parseExpr(OperandVector &Operands);
1329   StringRef getTokenStr() const;
1330   AsmToken peekToken();
1331   AsmToken getToken() const;
1332   SMLoc getLoc() const;
1333   void lex();
1334 
1335 public:
1336   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1337   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 
1339   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1340   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1341   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1342   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1343   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1344   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 
1346   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1347                             const unsigned MinVal,
1348                             const unsigned MaxVal,
1349                             const StringRef ErrMsg);
1350   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1351   bool parseSwizzleOffset(int64_t &Imm);
1352   bool parseSwizzleMacro(int64_t &Imm);
1353   bool parseSwizzleQuadPerm(int64_t &Imm);
1354   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1355   bool parseSwizzleBroadcast(int64_t &Imm);
1356   bool parseSwizzleSwap(int64_t &Imm);
1357   bool parseSwizzleReverse(int64_t &Imm);
1358 
1359   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1360   int64_t parseGPRIdxMacro();
1361 
1362   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1363   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1364   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1365   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1366   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 
1368   AMDGPUOperand::Ptr defaultDLC() const;
1369   AMDGPUOperand::Ptr defaultGLC() const;
1370   AMDGPUOperand::Ptr defaultSLC() const;
1371 
1372   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1373   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1374   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1375   AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 
1377   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 
1379   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1380                OptionalImmIndexMap &OptionalIdx);
1381   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1382   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1383   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 
1385   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 
1387   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1388                bool IsAtomic = false);
1389   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 
1391   OperandMatchResultTy parseDim(OperandVector &Operands);
1392   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1393   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1394   AMDGPUOperand::Ptr defaultRowMask() const;
1395   AMDGPUOperand::Ptr defaultBankMask() const;
1396   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1397   AMDGPUOperand::Ptr defaultFI() const;
1398   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1399   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 
1401   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1402                                     AMDGPUOperand::ImmTy Type);
1403   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1404   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1405   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1406   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1407   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1408   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1409                 uint64_t BasicInstType, bool skipVcc = false);
1410 
1411   AMDGPUOperand::Ptr defaultBLGP() const;
1412   AMDGPUOperand::Ptr defaultCBSZ() const;
1413   AMDGPUOperand::Ptr defaultABID() const;
1414 
1415   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1416   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1417 };
1418 
1419 struct OptionalOperand {
1420   const char *Name;
1421   AMDGPUOperand::ImmTy Type;
1422   bool IsBit;
1423   bool (*ConvertResult)(int64_t&);
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics *getFltSemantics(unsigned Size) {
1430   switch (Size) {
1431   case 4:
1432     return &APFloat::IEEEsingle();
1433   case 8:
1434     return &APFloat::IEEEdouble();
1435   case 2:
1436     return &APFloat::IEEEhalf();
1437   default:
1438     llvm_unreachable("unsupported fp type");
1439   }
1440 }
1441 
1442 static const fltSemantics *getFltSemantics(MVT VT) {
1443   return getFltSemantics(VT.getSizeInBits() / 8);
1444 }
1445 
1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1447   switch (OperandType) {
1448   case AMDGPU::OPERAND_REG_IMM_INT32:
1449   case AMDGPU::OPERAND_REG_IMM_FP32:
1450   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1451   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1452   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1453   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1454     return &APFloat::IEEEsingle();
1455   case AMDGPU::OPERAND_REG_IMM_INT64:
1456   case AMDGPU::OPERAND_REG_IMM_FP64:
1457   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1458   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1459     return &APFloat::IEEEdouble();
1460   case AMDGPU::OPERAND_REG_IMM_INT16:
1461   case AMDGPU::OPERAND_REG_IMM_FP16:
1462   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1463   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1464   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1465   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1466   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1467   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1468   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1469   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1470   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1471   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1472     return &APFloat::IEEEhalf();
1473   default:
1474     llvm_unreachable("unsupported fp type");
1475   }
1476 }
1477 
1478 //===----------------------------------------------------------------------===//
1479 // Operand
1480 //===----------------------------------------------------------------------===//
1481 
1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1483   bool Lost;
1484 
1485   // Convert literal to single precision
1486   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1487                                                APFloat::rmNearestTiesToEven,
1488                                                &Lost);
1489   // We allow precision lost but not overflow or underflow
1490   if (Status != APFloat::opOK &&
1491       Lost &&
1492       ((Status & APFloat::opOverflow)  != 0 ||
1493        (Status & APFloat::opUnderflow) != 0)) {
1494     return false;
1495   }
1496 
1497   return true;
1498 }
1499 
1500 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1501   return isUIntN(Size, Val) || isIntN(Size, Val);
1502 }
1503 
1504 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 
1506   // This is a hack to enable named inline values like
1507   // shared_base with both 32-bit and 64-bit operands.
1508   // Note that these values are defined as
1509   // 32-bit operands only.
1510   if (isInlineValue()) {
1511     return true;
1512   }
1513 
1514   if (!isImmTy(ImmTyNone)) {
1515     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1516     return false;
1517   }
1518   // TODO: We should avoid using host float here. It would be better to
1519   // check the float bit values which is what a few other places do.
1520   // We've had bot failures before due to weird NaN support on mips hosts.
1521 
1522   APInt Literal(64, Imm.Val);
1523 
1524   if (Imm.IsFPImm) { // We got fp literal token
1525     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1526       return AMDGPU::isInlinableLiteral64(Imm.Val,
1527                                           AsmParser->hasInv2PiInlineImm());
1528     }
1529 
1530     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1531     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1532       return false;
1533 
1534     if (type.getScalarSizeInBits() == 16) {
1535       return AMDGPU::isInlinableLiteral16(
1536         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1537         AsmParser->hasInv2PiInlineImm());
1538     }
1539 
1540     // Check if single precision literal is inlinable
1541     return AMDGPU::isInlinableLiteral32(
1542       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1543       AsmParser->hasInv2PiInlineImm());
1544   }
1545 
1546   // We got int literal token.
1547   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548     return AMDGPU::isInlinableLiteral64(Imm.Val,
1549                                         AsmParser->hasInv2PiInlineImm());
1550   }
1551 
1552   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1553     return false;
1554   }
1555 
1556   if (type.getScalarSizeInBits() == 16) {
1557     return AMDGPU::isInlinableLiteral16(
1558       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1559       AsmParser->hasInv2PiInlineImm());
1560   }
1561 
1562   return AMDGPU::isInlinableLiteral32(
1563     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1564     AsmParser->hasInv2PiInlineImm());
1565 }
1566 
1567 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1568   // Check that this immediate can be added as literal
1569   if (!isImmTy(ImmTyNone)) {
1570     return false;
1571   }
1572 
1573   if (!Imm.IsFPImm) {
1574     // We got int literal token.
1575 
1576     if (type == MVT::f64 && hasFPModifiers()) {
1577       // Cannot apply fp modifiers to int literals preserving the same semantics
1578       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579       // disable these cases.
1580       return false;
1581     }
1582 
1583     unsigned Size = type.getSizeInBits();
1584     if (Size == 64)
1585       Size = 32;
1586 
1587     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1588     // types.
1589     return isSafeTruncation(Imm.Val, Size);
1590   }
1591 
1592   // We got fp literal token
1593   if (type == MVT::f64) { // Expected 64-bit fp operand
1594     // We would set low 64-bits of literal to zeroes but we accept this literals
1595     return true;
1596   }
1597 
1598   if (type == MVT::i64) { // Expected 64-bit int operand
1599     // We don't allow fp literals in 64-bit integer instructions. It is
1600     // unclear how we should encode them.
1601     return false;
1602   }
1603 
1604   // We allow fp literals with f16x2 operands assuming that the specified
1605   // literal goes into the lower half and the upper half is zero. We also
1606   // require that the literal may be losslesly converted to f16.
1607   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1608                      (type == MVT::v2i16)? MVT::i16 : type;
1609 
1610   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1611   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1612 }
1613 
1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1615   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1616 }
1617 
1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1619   if (AsmParser->isVI())
1620     return isVReg32();
1621   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1622     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1623   else
1624     return false;
1625 }
1626 
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628   return isSDWAOperand(MVT::f16);
1629 }
1630 
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632   return isSDWAOperand(MVT::f32);
1633 }
1634 
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636   return isSDWAOperand(MVT::i16);
1637 }
1638 
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640   return isSDWAOperand(MVT::i32);
1641 }
1642 
1643 bool AMDGPUOperand::isBoolReg() const {
1644   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1645          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1646 }
1647 
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 {
1650   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1651   assert(Size == 2 || Size == 4 || Size == 8);
1652 
1653   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 
1655   if (Imm.Mods.Abs) {
1656     Val &= ~FpSignMask;
1657   }
1658   if (Imm.Mods.Neg) {
1659     Val ^= FpSignMask;
1660   }
1661 
1662   return Val;
1663 }
1664 
1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1666   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1667                              Inst.getNumOperands())) {
1668     addLiteralImmOperand(Inst, Imm.Val,
1669                          ApplyModifiers &
1670                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1671   } else {
1672     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1673     Inst.addOperand(MCOperand::createImm(Imm.Val));
1674   }
1675 }
1676 
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1678   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1679   auto OpNum = Inst.getNumOperands();
1680   // Check that this operand accepts literals
1681   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 
1683   if (ApplyModifiers) {
1684     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1685     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1686     Val = applyInputFPModifiers(Val, Size);
1687   }
1688 
1689   APInt Literal(64, Val);
1690   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 
1692   if (Imm.IsFPImm) { // We got fp literal token
1693     switch (OpTy) {
1694     case AMDGPU::OPERAND_REG_IMM_INT64:
1695     case AMDGPU::OPERAND_REG_IMM_FP64:
1696     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1697     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1698       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1699                                        AsmParser->hasInv2PiInlineImm())) {
1700         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1701         return;
1702       }
1703 
1704       // Non-inlineable
1705       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1706         // For fp operands we check if low 32 bits are zeros
1707         if (Literal.getLoBits(32) != 0) {
1708           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1709           "Can't encode literal as exact 64-bit floating-point operand. "
1710           "Low 32-bits will be set to zero");
1711         }
1712 
1713         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1714         return;
1715       }
1716 
1717       // We don't allow fp literals in 64-bit integer instructions. It is
1718       // unclear how we should encode them. This case should be checked earlier
1719       // in predicate methods (isLiteralImm())
1720       llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 
1722     case AMDGPU::OPERAND_REG_IMM_INT32:
1723     case AMDGPU::OPERAND_REG_IMM_FP32:
1724     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1725     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1726     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1727     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1728     case AMDGPU::OPERAND_REG_IMM_INT16:
1729     case AMDGPU::OPERAND_REG_IMM_FP16:
1730     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1740       bool lost;
1741       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1742       // Convert literal to single precision
1743       FPLiteral.convert(*getOpFltSemantics(OpTy),
1744                         APFloat::rmNearestTiesToEven, &lost);
1745       // We allow precision lost but not overflow or underflow. This should be
1746       // checked earlier in isLiteralImm()
1747 
1748       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1749       Inst.addOperand(MCOperand::createImm(ImmVal));
1750       return;
1751     }
1752     default:
1753       llvm_unreachable("invalid operand size");
1754     }
1755 
1756     return;
1757   }
1758 
1759   // We got int literal token.
1760   // Only sign extend inline immediates.
1761   switch (OpTy) {
1762   case AMDGPU::OPERAND_REG_IMM_INT32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32:
1764   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1769   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1770     if (isSafeTruncation(Val, 32) &&
1771         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1772                                      AsmParser->hasInv2PiInlineImm())) {
1773       Inst.addOperand(MCOperand::createImm(Val));
1774       return;
1775     }
1776 
1777     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1778     return;
1779 
1780   case AMDGPU::OPERAND_REG_IMM_INT64:
1781   case AMDGPU::OPERAND_REG_IMM_FP64:
1782   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1783   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1784     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1785       Inst.addOperand(MCOperand::createImm(Val));
1786       return;
1787     }
1788 
1789     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1790     return;
1791 
1792   case AMDGPU::OPERAND_REG_IMM_INT16:
1793   case AMDGPU::OPERAND_REG_IMM_FP16:
1794   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1795   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1798     if (isSafeTruncation(Val, 16) &&
1799         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1800                                      AsmParser->hasInv2PiInlineImm())) {
1801       Inst.addOperand(MCOperand::createImm(Val));
1802       return;
1803     }
1804 
1805     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1806     return;
1807 
1808   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1809   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1810   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1811   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1812     assert(isSafeTruncation(Val, 16));
1813     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1814                                         AsmParser->hasInv2PiInlineImm()));
1815 
1816     Inst.addOperand(MCOperand::createImm(Val));
1817     return;
1818   }
1819   default:
1820     llvm_unreachable("invalid operand size");
1821   }
1822 }
1823 
1824 template <unsigned Bitwidth>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1826   APInt Literal(64, Imm.Val);
1827 
1828   if (!Imm.IsFPImm) {
1829     // We got int literal token.
1830     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1831     return;
1832   }
1833 
1834   bool Lost;
1835   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1836   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1837                     APFloat::rmNearestTiesToEven, &Lost);
1838   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1839 }
1840 
1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1842   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1843 }
1844 
1845 static bool isInlineValue(unsigned Reg) {
1846   switch (Reg) {
1847   case AMDGPU::SRC_SHARED_BASE:
1848   case AMDGPU::SRC_SHARED_LIMIT:
1849   case AMDGPU::SRC_PRIVATE_BASE:
1850   case AMDGPU::SRC_PRIVATE_LIMIT:
1851   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1852     return true;
1853   case AMDGPU::SRC_VCCZ:
1854   case AMDGPU::SRC_EXECZ:
1855   case AMDGPU::SRC_SCC:
1856     return true;
1857   case AMDGPU::SGPR_NULL:
1858     return true;
1859   default:
1860     return false;
1861   }
1862 }
1863 
1864 bool AMDGPUOperand::isInlineValue() const {
1865   return isRegKind() && ::isInlineValue(getReg());
1866 }
1867 
1868 //===----------------------------------------------------------------------===//
1869 // AsmParser
1870 //===----------------------------------------------------------------------===//
1871 
1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1873   if (Is == IS_VGPR) {
1874     switch (RegWidth) {
1875       default: return -1;
1876       case 1: return AMDGPU::VGPR_32RegClassID;
1877       case 2: return AMDGPU::VReg_64RegClassID;
1878       case 3: return AMDGPU::VReg_96RegClassID;
1879       case 4: return AMDGPU::VReg_128RegClassID;
1880       case 5: return AMDGPU::VReg_160RegClassID;
1881       case 8: return AMDGPU::VReg_256RegClassID;
1882       case 16: return AMDGPU::VReg_512RegClassID;
1883       case 32: return AMDGPU::VReg_1024RegClassID;
1884     }
1885   } else if (Is == IS_TTMP) {
1886     switch (RegWidth) {
1887       default: return -1;
1888       case 1: return AMDGPU::TTMP_32RegClassID;
1889       case 2: return AMDGPU::TTMP_64RegClassID;
1890       case 4: return AMDGPU::TTMP_128RegClassID;
1891       case 8: return AMDGPU::TTMP_256RegClassID;
1892       case 16: return AMDGPU::TTMP_512RegClassID;
1893     }
1894   } else if (Is == IS_SGPR) {
1895     switch (RegWidth) {
1896       default: return -1;
1897       case 1: return AMDGPU::SGPR_32RegClassID;
1898       case 2: return AMDGPU::SGPR_64RegClassID;
1899       case 4: return AMDGPU::SGPR_128RegClassID;
1900       case 8: return AMDGPU::SGPR_256RegClassID;
1901       case 16: return AMDGPU::SGPR_512RegClassID;
1902     }
1903   } else if (Is == IS_AGPR) {
1904     switch (RegWidth) {
1905       default: return -1;
1906       case 1: return AMDGPU::AGPR_32RegClassID;
1907       case 2: return AMDGPU::AReg_64RegClassID;
1908       case 4: return AMDGPU::AReg_128RegClassID;
1909       case 16: return AMDGPU::AReg_512RegClassID;
1910       case 32: return AMDGPU::AReg_1024RegClassID;
1911     }
1912   }
1913   return -1;
1914 }
1915 
1916 static unsigned getSpecialRegForName(StringRef RegName) {
1917   return StringSwitch<unsigned>(RegName)
1918     .Case("exec", AMDGPU::EXEC)
1919     .Case("vcc", AMDGPU::VCC)
1920     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1921     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1922     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1923     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1924     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1925     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1926     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1927     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1928     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1929     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1930     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1931     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1932     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1933     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1934     .Case("m0", AMDGPU::M0)
1935     .Case("vccz", AMDGPU::SRC_VCCZ)
1936     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1937     .Case("execz", AMDGPU::SRC_EXECZ)
1938     .Case("src_execz", AMDGPU::SRC_EXECZ)
1939     .Case("scc", AMDGPU::SRC_SCC)
1940     .Case("src_scc", AMDGPU::SRC_SCC)
1941     .Case("tba", AMDGPU::TBA)
1942     .Case("tma", AMDGPU::TMA)
1943     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1944     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1945     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1946     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1947     .Case("vcc_lo", AMDGPU::VCC_LO)
1948     .Case("vcc_hi", AMDGPU::VCC_HI)
1949     .Case("exec_lo", AMDGPU::EXEC_LO)
1950     .Case("exec_hi", AMDGPU::EXEC_HI)
1951     .Case("tma_lo", AMDGPU::TMA_LO)
1952     .Case("tma_hi", AMDGPU::TMA_HI)
1953     .Case("tba_lo", AMDGPU::TBA_LO)
1954     .Case("tba_hi", AMDGPU::TBA_HI)
1955     .Case("null", AMDGPU::SGPR_NULL)
1956     .Default(0);
1957 }
1958 
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1960                                     SMLoc &EndLoc) {
1961   auto R = parseRegister();
1962   if (!R) return true;
1963   assert(R->isReg());
1964   RegNo = R->getReg();
1965   StartLoc = R->getStartLoc();
1966   EndLoc = R->getEndLoc();
1967   return false;
1968 }
1969 
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1971                                             RegisterKind RegKind, unsigned Reg1,
1972                                             unsigned RegNum) {
1973   switch (RegKind) {
1974   case IS_SPECIAL:
1975     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1976       Reg = AMDGPU::EXEC;
1977       RegWidth = 2;
1978       return true;
1979     }
1980     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1981       Reg = AMDGPU::FLAT_SCR;
1982       RegWidth = 2;
1983       return true;
1984     }
1985     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1986       Reg = AMDGPU::XNACK_MASK;
1987       RegWidth = 2;
1988       return true;
1989     }
1990     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1991       Reg = AMDGPU::VCC;
1992       RegWidth = 2;
1993       return true;
1994     }
1995     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1996       Reg = AMDGPU::TBA;
1997       RegWidth = 2;
1998       return true;
1999     }
2000     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2001       Reg = AMDGPU::TMA;
2002       RegWidth = 2;
2003       return true;
2004     }
2005     return false;
2006   case IS_VGPR:
2007   case IS_SGPR:
2008   case IS_AGPR:
2009   case IS_TTMP:
2010     if (Reg1 != Reg + RegWidth) {
2011       return false;
2012     }
2013     RegWidth++;
2014     return true;
2015   default:
2016     llvm_unreachable("unexpected register kind");
2017   }
2018 }
2019 
2020 static constexpr StringLiteral Registers[] = {"v", "s", "ttmp", "acc", "a"};
2021 
2022 bool
2023 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2024                             const AsmToken &NextToken) const {
2025 
2026   // A list of consecutive registers: [s0,s1,s2,s3]
2027   if (Token.is(AsmToken::LBrac))
2028     return true;
2029 
2030   if (!Token.is(AsmToken::Identifier))
2031     return false;
2032 
2033   // A single register like s0 or a range of registers like s[0:1]
2034 
2035   StringRef RegName = Token.getString();
2036 
2037   for (StringRef Reg : Registers) {
2038     if (RegName.startswith(Reg)) {
2039       if (Reg.size() < RegName.size()) {
2040         unsigned RegNum;
2041         // A single register with an index: rXX
2042         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2043           return true;
2044       } else {
2045         // A range of registers: r[XX:YY].
2046         if (NextToken.is(AsmToken::LBrac))
2047           return true;
2048       }
2049     }
2050   }
2051 
2052   return getSpecialRegForName(RegName);
2053 }
2054 
2055 bool
2056 AMDGPUAsmParser::isRegister()
2057 {
2058   return isRegister(getToken(), peekToken());
2059 }
2060 
2061 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2062                                           unsigned &RegNum, unsigned &RegWidth,
2063                                           unsigned *DwordRegIndex) {
2064   if (DwordRegIndex) { *DwordRegIndex = 0; }
2065   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2066   if (getLexer().is(AsmToken::Identifier)) {
2067     StringRef RegName = Parser.getTok().getString();
2068     if ((Reg = getSpecialRegForName(RegName))) {
2069       Parser.Lex();
2070       RegKind = IS_SPECIAL;
2071     } else {
2072       unsigned RegNumIndex = 0;
2073       if (RegName[0] == 'v') {
2074         RegNumIndex = 1;
2075         RegKind = IS_VGPR;
2076       } else if (RegName[0] == 's') {
2077         RegNumIndex = 1;
2078         RegKind = IS_SGPR;
2079       } else if (RegName[0] == 'a') {
2080         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2081         RegKind = IS_AGPR;
2082       } else if (RegName.startswith("ttmp")) {
2083         RegNumIndex = strlen("ttmp");
2084         RegKind = IS_TTMP;
2085       } else {
2086         return false;
2087       }
2088       if (RegName.size() > RegNumIndex) {
2089         // Single 32-bit register: vXX.
2090         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2091           return false;
2092         Parser.Lex();
2093         RegWidth = 1;
2094       } else {
2095         // Range of registers: v[XX:YY]. ":YY" is optional.
2096         Parser.Lex();
2097         int64_t RegLo, RegHi;
2098         if (getLexer().isNot(AsmToken::LBrac))
2099           return false;
2100         Parser.Lex();
2101 
2102         if (getParser().parseAbsoluteExpression(RegLo))
2103           return false;
2104 
2105         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2106         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2107           return false;
2108         Parser.Lex();
2109 
2110         if (isRBrace) {
2111           RegHi = RegLo;
2112         } else {
2113           if (getParser().parseAbsoluteExpression(RegHi))
2114             return false;
2115 
2116           if (getLexer().isNot(AsmToken::RBrac))
2117             return false;
2118           Parser.Lex();
2119         }
2120         RegNum = (unsigned) RegLo;
2121         RegWidth = (RegHi - RegLo) + 1;
2122       }
2123     }
2124   } else if (getLexer().is(AsmToken::LBrac)) {
2125     // List of consecutive registers: [s0,s1,s2,s3]
2126     Parser.Lex();
2127     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2128       return false;
2129     if (RegWidth != 1)
2130       return false;
2131     RegisterKind RegKind1;
2132     unsigned Reg1, RegNum1, RegWidth1;
2133     do {
2134       if (getLexer().is(AsmToken::Comma)) {
2135         Parser.Lex();
2136       } else if (getLexer().is(AsmToken::RBrac)) {
2137         Parser.Lex();
2138         break;
2139       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2140         if (RegWidth1 != 1) {
2141           return false;
2142         }
2143         if (RegKind1 != RegKind) {
2144           return false;
2145         }
2146         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2147           return false;
2148         }
2149       } else {
2150         return false;
2151       }
2152     } while (true);
2153   } else {
2154     return false;
2155   }
2156   switch (RegKind) {
2157   case IS_SPECIAL:
2158     RegNum = 0;
2159     RegWidth = 1;
2160     break;
2161   case IS_VGPR:
2162   case IS_SGPR:
2163   case IS_AGPR:
2164   case IS_TTMP:
2165   {
2166     unsigned Size = 1;
2167     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2168       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2169       Size = std::min(RegWidth, 4u);
2170     }
2171     if (RegNum % Size != 0)
2172       return false;
2173     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2174     RegNum = RegNum / Size;
2175     int RCID = getRegClass(RegKind, RegWidth);
2176     if (RCID == -1)
2177       return false;
2178     const MCRegisterClass RC = TRI->getRegClass(RCID);
2179     if (RegNum >= RC.getNumRegs())
2180       return false;
2181     Reg = RC.getRegister(RegNum);
2182     break;
2183   }
2184 
2185   default:
2186     llvm_unreachable("unexpected register kind");
2187   }
2188 
2189   if (!subtargetHasRegister(*TRI, Reg))
2190     return false;
2191   return true;
2192 }
2193 
2194 Optional<StringRef>
2195 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2196   switch (RegKind) {
2197   case IS_VGPR:
2198     return StringRef(".amdgcn.next_free_vgpr");
2199   case IS_SGPR:
2200     return StringRef(".amdgcn.next_free_sgpr");
2201   default:
2202     return None;
2203   }
2204 }
2205 
2206 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2207   auto SymbolName = getGprCountSymbolName(RegKind);
2208   assert(SymbolName && "initializing invalid register kind");
2209   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2210   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2211 }
2212 
2213 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2214                                             unsigned DwordRegIndex,
2215                                             unsigned RegWidth) {
2216   // Symbols are only defined for GCN targets
2217   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2218     return true;
2219 
2220   auto SymbolName = getGprCountSymbolName(RegKind);
2221   if (!SymbolName)
2222     return true;
2223   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2224 
2225   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2226   int64_t OldCount;
2227 
2228   if (!Sym->isVariable())
2229     return !Error(getParser().getTok().getLoc(),
2230                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2231   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2232     return !Error(
2233         getParser().getTok().getLoc(),
2234         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2235 
2236   if (OldCount <= NewMax)
2237     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2238 
2239   return true;
2240 }
2241 
2242 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2243   const auto &Tok = Parser.getTok();
2244   SMLoc StartLoc = Tok.getLoc();
2245   SMLoc EndLoc = Tok.getEndLoc();
2246   RegisterKind RegKind;
2247   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2248 
2249   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2250     //FIXME: improve error messages (bug 41303).
2251     Error(StartLoc, "not a valid operand.");
2252     return nullptr;
2253   }
2254   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2255     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2256       return nullptr;
2257   } else
2258     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2259   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2260 }
2261 
2262 OperandMatchResultTy
2263 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2264   // TODO: add syntactic sugar for 1/(2*PI)
2265 
2266   assert(!isRegister());
2267   assert(!isModifier());
2268 
2269   const auto& Tok = getToken();
2270   const auto& NextTok = peekToken();
2271   bool IsReal = Tok.is(AsmToken::Real);
2272   SMLoc S = getLoc();
2273   bool Negate = false;
2274 
2275   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2276     lex();
2277     IsReal = true;
2278     Negate = true;
2279   }
2280 
2281   if (IsReal) {
2282     // Floating-point expressions are not supported.
2283     // Can only allow floating-point literals with an
2284     // optional sign.
2285 
2286     StringRef Num = getTokenStr();
2287     lex();
2288 
2289     APFloat RealVal(APFloat::IEEEdouble());
2290     auto roundMode = APFloat::rmNearestTiesToEven;
2291     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2292       return MatchOperand_ParseFail;
2293     }
2294     if (Negate)
2295       RealVal.changeSign();
2296 
2297     Operands.push_back(
2298       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2299                                AMDGPUOperand::ImmTyNone, true));
2300 
2301     return MatchOperand_Success;
2302 
2303   } else {
2304     int64_t IntVal;
2305     const MCExpr *Expr;
2306     SMLoc S = getLoc();
2307 
2308     if (HasSP3AbsModifier) {
2309       // This is a workaround for handling expressions
2310       // as arguments of SP3 'abs' modifier, for example:
2311       //     |1.0|
2312       //     |-1|
2313       //     |1+x|
2314       // This syntax is not compatible with syntax of standard
2315       // MC expressions (due to the trailing '|').
2316       SMLoc EndLoc;
2317       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2318         return MatchOperand_ParseFail;
2319     } else {
2320       if (Parser.parseExpression(Expr))
2321         return MatchOperand_ParseFail;
2322     }
2323 
2324     if (Expr->evaluateAsAbsolute(IntVal)) {
2325       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2326     } else {
2327       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2328     }
2329 
2330     return MatchOperand_Success;
2331   }
2332 
2333   return MatchOperand_NoMatch;
2334 }
2335 
2336 OperandMatchResultTy
2337 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2338   if (!isRegister())
2339     return MatchOperand_NoMatch;
2340 
2341   if (auto R = parseRegister()) {
2342     assert(R->isReg());
2343     Operands.push_back(std::move(R));
2344     return MatchOperand_Success;
2345   }
2346   return MatchOperand_ParseFail;
2347 }
2348 
2349 OperandMatchResultTy
2350 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2351   auto res = parseReg(Operands);
2352   if (res != MatchOperand_NoMatch) {
2353     return res;
2354   } else if (isModifier()) {
2355     return MatchOperand_NoMatch;
2356   } else {
2357     return parseImm(Operands, HasSP3AbsMod);
2358   }
2359 }
2360 
2361 bool
2362 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2363   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2364     const auto &str = Token.getString();
2365     return str == "abs" || str == "neg" || str == "sext";
2366   }
2367   return false;
2368 }
2369 
2370 bool
2371 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2372   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2373 }
2374 
2375 bool
2376 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2377   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2378 }
2379 
2380 bool
2381 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2382   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2383 }
2384 
2385 // Check if this is an operand modifier or an opcode modifier
2386 // which may look like an expression but it is not. We should
2387 // avoid parsing these modifiers as expressions. Currently
2388 // recognized sequences are:
2389 //   |...|
2390 //   abs(...)
2391 //   neg(...)
2392 //   sext(...)
2393 //   -reg
2394 //   -|...|
2395 //   -abs(...)
2396 //   name:...
2397 // Note that simple opcode modifiers like 'gds' may be parsed as
2398 // expressions; this is a special case. See getExpressionAsToken.
2399 //
2400 bool
2401 AMDGPUAsmParser::isModifier() {
2402 
2403   AsmToken Tok = getToken();
2404   AsmToken NextToken[2];
2405   peekTokens(NextToken);
2406 
2407   return isOperandModifier(Tok, NextToken[0]) ||
2408          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2409          isOpcodeModifierWithVal(Tok, NextToken[0]);
2410 }
2411 
2412 // Check if the current token is an SP3 'neg' modifier.
2413 // Currently this modifier is allowed in the following context:
2414 //
2415 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2416 // 2. Before an 'abs' modifier: -abs(...)
2417 // 3. Before an SP3 'abs' modifier: -|...|
2418 //
2419 // In all other cases "-" is handled as a part
2420 // of an expression that follows the sign.
2421 //
2422 // Note: When "-" is followed by an integer literal,
2423 // this is interpreted as integer negation rather
2424 // than a floating-point NEG modifier applied to N.
2425 // Beside being contr-intuitive, such use of floating-point
2426 // NEG modifier would have resulted in different meaning
2427 // of integer literals used with VOP1/2/C and VOP3,
2428 // for example:
2429 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2430 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2431 // Negative fp literals with preceding "-" are
2432 // handled likewise for unifomtity
2433 //
2434 bool
2435 AMDGPUAsmParser::parseSP3NegModifier() {
2436 
2437   AsmToken NextToken[2];
2438   peekTokens(NextToken);
2439 
2440   if (isToken(AsmToken::Minus) &&
2441       (isRegister(NextToken[0], NextToken[1]) ||
2442        NextToken[0].is(AsmToken::Pipe) ||
2443        isId(NextToken[0], "abs"))) {
2444     lex();
2445     return true;
2446   }
2447 
2448   return false;
2449 }
2450 
2451 OperandMatchResultTy
2452 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2453                                               bool AllowImm) {
2454   bool Neg, SP3Neg;
2455   bool Abs, SP3Abs;
2456   SMLoc Loc;
2457 
2458   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2459   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2460     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2461     return MatchOperand_ParseFail;
2462   }
2463 
2464   SP3Neg = parseSP3NegModifier();
2465 
2466   Loc = getLoc();
2467   Neg = trySkipId("neg");
2468   if (Neg && SP3Neg) {
2469     Error(Loc, "expected register or immediate");
2470     return MatchOperand_ParseFail;
2471   }
2472   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2473     return MatchOperand_ParseFail;
2474 
2475   Abs = trySkipId("abs");
2476   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2477     return MatchOperand_ParseFail;
2478 
2479   Loc = getLoc();
2480   SP3Abs = trySkipToken(AsmToken::Pipe);
2481   if (Abs && SP3Abs) {
2482     Error(Loc, "expected register or immediate");
2483     return MatchOperand_ParseFail;
2484   }
2485 
2486   OperandMatchResultTy Res;
2487   if (AllowImm) {
2488     Res = parseRegOrImm(Operands, SP3Abs);
2489   } else {
2490     Res = parseReg(Operands);
2491   }
2492   if (Res != MatchOperand_Success) {
2493     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2494   }
2495 
2496   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2497     return MatchOperand_ParseFail;
2498   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2499     return MatchOperand_ParseFail;
2500   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2501     return MatchOperand_ParseFail;
2502 
2503   AMDGPUOperand::Modifiers Mods;
2504   Mods.Abs = Abs || SP3Abs;
2505   Mods.Neg = Neg || SP3Neg;
2506 
2507   if (Mods.hasFPModifiers()) {
2508     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2509     if (Op.isExpr()) {
2510       Error(Op.getStartLoc(), "expected an absolute expression");
2511       return MatchOperand_ParseFail;
2512     }
2513     Op.setModifiers(Mods);
2514   }
2515   return MatchOperand_Success;
2516 }
2517 
2518 OperandMatchResultTy
2519 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2520                                                bool AllowImm) {
2521   bool Sext = trySkipId("sext");
2522   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2523     return MatchOperand_ParseFail;
2524 
2525   OperandMatchResultTy Res;
2526   if (AllowImm) {
2527     Res = parseRegOrImm(Operands);
2528   } else {
2529     Res = parseReg(Operands);
2530   }
2531   if (Res != MatchOperand_Success) {
2532     return Sext? MatchOperand_ParseFail : Res;
2533   }
2534 
2535   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2536     return MatchOperand_ParseFail;
2537 
2538   AMDGPUOperand::Modifiers Mods;
2539   Mods.Sext = Sext;
2540 
2541   if (Mods.hasIntModifiers()) {
2542     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2543     if (Op.isExpr()) {
2544       Error(Op.getStartLoc(), "expected an absolute expression");
2545       return MatchOperand_ParseFail;
2546     }
2547     Op.setModifiers(Mods);
2548   }
2549 
2550   return MatchOperand_Success;
2551 }
2552 
2553 OperandMatchResultTy
2554 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2555   return parseRegOrImmWithFPInputMods(Operands, false);
2556 }
2557 
2558 OperandMatchResultTy
2559 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2560   return parseRegOrImmWithIntInputMods(Operands, false);
2561 }
2562 
2563 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2564   auto Loc = getLoc();
2565   if (trySkipId("off")) {
2566     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2567                                                 AMDGPUOperand::ImmTyOff, false));
2568     return MatchOperand_Success;
2569   }
2570 
2571   if (!isRegister())
2572     return MatchOperand_NoMatch;
2573 
2574   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2575   if (Reg) {
2576     Operands.push_back(std::move(Reg));
2577     return MatchOperand_Success;
2578   }
2579 
2580   return MatchOperand_ParseFail;
2581 
2582 }
2583 
2584 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2585   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2586 
2587   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2588       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2589       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2590       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2591     return Match_InvalidOperand;
2592 
2593   if ((TSFlags & SIInstrFlags::VOP3) &&
2594       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2595       getForcedEncodingSize() != 64)
2596     return Match_PreferE32;
2597 
2598   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2599       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2600     // v_mac_f32/16 allow only dst_sel == DWORD;
2601     auto OpNum =
2602         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2603     const auto &Op = Inst.getOperand(OpNum);
2604     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2605       return Match_InvalidOperand;
2606     }
2607   }
2608 
2609   return Match_Success;
2610 }
2611 
2612 // What asm variants we should check
2613 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2614   if (getForcedEncodingSize() == 32) {
2615     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2616     return makeArrayRef(Variants);
2617   }
2618 
2619   if (isForcedVOP3()) {
2620     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2621     return makeArrayRef(Variants);
2622   }
2623 
2624   if (isForcedSDWA()) {
2625     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2626                                         AMDGPUAsmVariants::SDWA9};
2627     return makeArrayRef(Variants);
2628   }
2629 
2630   if (isForcedDPP()) {
2631     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2632     return makeArrayRef(Variants);
2633   }
2634 
2635   static const unsigned Variants[] = {
2636     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2637     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2638   };
2639 
2640   return makeArrayRef(Variants);
2641 }
2642 
2643 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2644   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2645   const unsigned Num = Desc.getNumImplicitUses();
2646   for (unsigned i = 0; i < Num; ++i) {
2647     unsigned Reg = Desc.ImplicitUses[i];
2648     switch (Reg) {
2649     case AMDGPU::FLAT_SCR:
2650     case AMDGPU::VCC:
2651     case AMDGPU::VCC_LO:
2652     case AMDGPU::VCC_HI:
2653     case AMDGPU::M0:
2654       return Reg;
2655     default:
2656       break;
2657     }
2658   }
2659   return AMDGPU::NoRegister;
2660 }
2661 
2662 // NB: This code is correct only when used to check constant
2663 // bus limitations because GFX7 support no f16 inline constants.
2664 // Note that there are no cases when a GFX7 opcode violates
2665 // constant bus limitations due to the use of an f16 constant.
2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2667                                        unsigned OpIdx) const {
2668   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2669 
2670   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2671     return false;
2672   }
2673 
2674   const MCOperand &MO = Inst.getOperand(OpIdx);
2675 
2676   int64_t Val = MO.getImm();
2677   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2678 
2679   switch (OpSize) { // expected operand size
2680   case 8:
2681     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2682   case 4:
2683     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2684   case 2: {
2685     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2686     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2687         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2688         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2689         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2690         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2691         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2692       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2693     } else {
2694       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2695     }
2696   }
2697   default:
2698     llvm_unreachable("invalid operand size");
2699   }
2700 }
2701 
2702 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2703   if (!isGFX10())
2704     return 1;
2705 
2706   switch (Opcode) {
2707   // 64-bit shift instructions can use only one scalar value input
2708   case AMDGPU::V_LSHLREV_B64:
2709   case AMDGPU::V_LSHLREV_B64_gfx10:
2710   case AMDGPU::V_LSHL_B64:
2711   case AMDGPU::V_LSHRREV_B64:
2712   case AMDGPU::V_LSHRREV_B64_gfx10:
2713   case AMDGPU::V_LSHR_B64:
2714   case AMDGPU::V_ASHRREV_I64:
2715   case AMDGPU::V_ASHRREV_I64_gfx10:
2716   case AMDGPU::V_ASHR_I64:
2717     return 1;
2718   default:
2719     return 2;
2720   }
2721 }
2722 
2723 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2724   const MCOperand &MO = Inst.getOperand(OpIdx);
2725   if (MO.isImm()) {
2726     return !isInlineConstant(Inst, OpIdx);
2727   } else if (MO.isReg()) {
2728     auto Reg = MO.getReg();
2729     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2730     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2731   } else {
2732     return true;
2733   }
2734 }
2735 
2736 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2737   const unsigned Opcode = Inst.getOpcode();
2738   const MCInstrDesc &Desc = MII.get(Opcode);
2739   unsigned ConstantBusUseCount = 0;
2740   unsigned NumLiterals = 0;
2741   unsigned LiteralSize;
2742 
2743   if (Desc.TSFlags &
2744       (SIInstrFlags::VOPC |
2745        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2746        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2747        SIInstrFlags::SDWA)) {
2748     // Check special imm operands (used by madmk, etc)
2749     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2750       ++ConstantBusUseCount;
2751     }
2752 
2753     SmallDenseSet<unsigned> SGPRsUsed;
2754     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2755     if (SGPRUsed != AMDGPU::NoRegister) {
2756       SGPRsUsed.insert(SGPRUsed);
2757       ++ConstantBusUseCount;
2758     }
2759 
2760     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2761     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2762     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2763 
2764     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2765 
2766     for (int OpIdx : OpIndices) {
2767       if (OpIdx == -1) break;
2768 
2769       const MCOperand &MO = Inst.getOperand(OpIdx);
2770       if (usesConstantBus(Inst, OpIdx)) {
2771         if (MO.isReg()) {
2772           const unsigned Reg = mc2PseudoReg(MO.getReg());
2773           // Pairs of registers with a partial intersections like these
2774           //   s0, s[0:1]
2775           //   flat_scratch_lo, flat_scratch
2776           //   flat_scratch_lo, flat_scratch_hi
2777           // are theoretically valid but they are disabled anyway.
2778           // Note that this code mimics SIInstrInfo::verifyInstruction
2779           if (!SGPRsUsed.count(Reg)) {
2780             SGPRsUsed.insert(Reg);
2781             ++ConstantBusUseCount;
2782           }
2783         } else { // Expression or a literal
2784 
2785           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2786             continue; // special operand like VINTERP attr_chan
2787 
2788           // An instruction may use only one literal.
2789           // This has been validated on the previous step.
2790           // See validateVOP3Literal.
2791           // This literal may be used as more than one operand.
2792           // If all these operands are of the same size,
2793           // this literal counts as one scalar value.
2794           // Otherwise it counts as 2 scalar values.
2795           // See "GFX10 Shader Programming", section 3.6.2.3.
2796 
2797           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2798           if (Size < 4) Size = 4;
2799 
2800           if (NumLiterals == 0) {
2801             NumLiterals = 1;
2802             LiteralSize = Size;
2803           } else if (LiteralSize != Size) {
2804             NumLiterals = 2;
2805           }
2806         }
2807       }
2808     }
2809   }
2810   ConstantBusUseCount += NumLiterals;
2811 
2812   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2813 }
2814 
2815 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2816   const unsigned Opcode = Inst.getOpcode();
2817   const MCInstrDesc &Desc = MII.get(Opcode);
2818 
2819   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2820   if (DstIdx == -1 ||
2821       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2822     return true;
2823   }
2824 
2825   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2826 
2827   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2828   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2829   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2830 
2831   assert(DstIdx != -1);
2832   const MCOperand &Dst = Inst.getOperand(DstIdx);
2833   assert(Dst.isReg());
2834   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2835 
2836   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2837 
2838   for (int SrcIdx : SrcIndices) {
2839     if (SrcIdx == -1) break;
2840     const MCOperand &Src = Inst.getOperand(SrcIdx);
2841     if (Src.isReg()) {
2842       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2843       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2844         return false;
2845       }
2846     }
2847   }
2848 
2849   return true;
2850 }
2851 
2852 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2853 
2854   const unsigned Opc = Inst.getOpcode();
2855   const MCInstrDesc &Desc = MII.get(Opc);
2856 
2857   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2858     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2859     assert(ClampIdx != -1);
2860     return Inst.getOperand(ClampIdx).getImm() == 0;
2861   }
2862 
2863   return true;
2864 }
2865 
2866 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2867 
2868   const unsigned Opc = Inst.getOpcode();
2869   const MCInstrDesc &Desc = MII.get(Opc);
2870 
2871   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2872     return true;
2873 
2874   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2875   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2876   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2877 
2878   assert(VDataIdx != -1);
2879   assert(DMaskIdx != -1);
2880   assert(TFEIdx != -1);
2881 
2882   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2883   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2884   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2885   if (DMask == 0)
2886     DMask = 1;
2887 
2888   unsigned DataSize =
2889     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2890   if (hasPackedD16()) {
2891     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2892     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2893       DataSize = (DataSize + 1) / 2;
2894   }
2895 
2896   return (VDataSize / 4) == DataSize + TFESize;
2897 }
2898 
2899 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2900   const unsigned Opc = Inst.getOpcode();
2901   const MCInstrDesc &Desc = MII.get(Opc);
2902 
2903   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2904     return true;
2905 
2906   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2907   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2908       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2909   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2910   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2911   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2912 
2913   assert(VAddr0Idx != -1);
2914   assert(SrsrcIdx != -1);
2915   assert(DimIdx != -1);
2916   assert(SrsrcIdx > VAddr0Idx);
2917 
2918   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2919   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2920   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2921   unsigned VAddrSize =
2922       IsNSA ? SrsrcIdx - VAddr0Idx
2923             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2924 
2925   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2926                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2927                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2928                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2929   if (!IsNSA) {
2930     if (AddrSize > 8)
2931       AddrSize = 16;
2932     else if (AddrSize > 4)
2933       AddrSize = 8;
2934   }
2935 
2936   return VAddrSize == AddrSize;
2937 }
2938 
2939 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2940 
2941   const unsigned Opc = Inst.getOpcode();
2942   const MCInstrDesc &Desc = MII.get(Opc);
2943 
2944   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2945     return true;
2946   if (!Desc.mayLoad() || !Desc.mayStore())
2947     return true; // Not atomic
2948 
2949   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2951 
2952   // This is an incomplete check because image_atomic_cmpswap
2953   // may only use 0x3 and 0xf while other atomic operations
2954   // may use 0x1 and 0x3. However these limitations are
2955   // verified when we check that dmask matches dst size.
2956   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2957 }
2958 
2959 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2960 
2961   const unsigned Opc = Inst.getOpcode();
2962   const MCInstrDesc &Desc = MII.get(Opc);
2963 
2964   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2965     return true;
2966 
2967   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2968   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2969 
2970   // GATHER4 instructions use dmask in a different fashion compared to
2971   // other MIMG instructions. The only useful DMASK values are
2972   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2973   // (red,red,red,red) etc.) The ISA document doesn't mention
2974   // this.
2975   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2976 }
2977 
2978 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2979 
2980   const unsigned Opc = Inst.getOpcode();
2981   const MCInstrDesc &Desc = MII.get(Opc);
2982 
2983   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2984     return true;
2985 
2986   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2987   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2988     if (isCI() || isSI())
2989       return false;
2990   }
2991 
2992   return true;
2993 }
2994 
2995 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2996   const unsigned Opc = Inst.getOpcode();
2997   const MCInstrDesc &Desc = MII.get(Opc);
2998 
2999   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3000     return true;
3001 
3002   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3003   if (DimIdx < 0)
3004     return true;
3005 
3006   long Imm = Inst.getOperand(DimIdx).getImm();
3007   if (Imm < 0 || Imm >= 8)
3008     return false;
3009 
3010   return true;
3011 }
3012 
3013 static bool IsRevOpcode(const unsigned Opcode)
3014 {
3015   switch (Opcode) {
3016   case AMDGPU::V_SUBREV_F32_e32:
3017   case AMDGPU::V_SUBREV_F32_e64:
3018   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3019   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3020   case AMDGPU::V_SUBREV_F32_e32_vi:
3021   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3022   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3023   case AMDGPU::V_SUBREV_F32_e64_vi:
3024 
3025   case AMDGPU::V_SUBREV_I32_e32:
3026   case AMDGPU::V_SUBREV_I32_e64:
3027   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3028   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3029 
3030   case AMDGPU::V_SUBBREV_U32_e32:
3031   case AMDGPU::V_SUBBREV_U32_e64:
3032   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3033   case AMDGPU::V_SUBBREV_U32_e32_vi:
3034   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3035   case AMDGPU::V_SUBBREV_U32_e64_vi:
3036 
3037   case AMDGPU::V_SUBREV_U32_e32:
3038   case AMDGPU::V_SUBREV_U32_e64:
3039   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3040   case AMDGPU::V_SUBREV_U32_e32_vi:
3041   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3042   case AMDGPU::V_SUBREV_U32_e64_vi:
3043 
3044   case AMDGPU::V_SUBREV_F16_e32:
3045   case AMDGPU::V_SUBREV_F16_e64:
3046   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3047   case AMDGPU::V_SUBREV_F16_e32_vi:
3048   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3049   case AMDGPU::V_SUBREV_F16_e64_vi:
3050 
3051   case AMDGPU::V_SUBREV_U16_e32:
3052   case AMDGPU::V_SUBREV_U16_e64:
3053   case AMDGPU::V_SUBREV_U16_e32_vi:
3054   case AMDGPU::V_SUBREV_U16_e64_vi:
3055 
3056   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3057   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3058   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3059 
3060   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3061   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3062 
3063   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3064   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3065 
3066   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3067   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3068 
3069   case AMDGPU::V_LSHRREV_B32_e32:
3070   case AMDGPU::V_LSHRREV_B32_e64:
3071   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3072   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3073   case AMDGPU::V_LSHRREV_B32_e32_vi:
3074   case AMDGPU::V_LSHRREV_B32_e64_vi:
3075   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3076   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3077 
3078   case AMDGPU::V_ASHRREV_I32_e32:
3079   case AMDGPU::V_ASHRREV_I32_e64:
3080   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3081   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3082   case AMDGPU::V_ASHRREV_I32_e32_vi:
3083   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3084   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3085   case AMDGPU::V_ASHRREV_I32_e64_vi:
3086 
3087   case AMDGPU::V_LSHLREV_B32_e32:
3088   case AMDGPU::V_LSHLREV_B32_e64:
3089   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3090   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3091   case AMDGPU::V_LSHLREV_B32_e32_vi:
3092   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3093   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3094   case AMDGPU::V_LSHLREV_B32_e64_vi:
3095 
3096   case AMDGPU::V_LSHLREV_B16_e32:
3097   case AMDGPU::V_LSHLREV_B16_e64:
3098   case AMDGPU::V_LSHLREV_B16_e32_vi:
3099   case AMDGPU::V_LSHLREV_B16_e64_vi:
3100   case AMDGPU::V_LSHLREV_B16_gfx10:
3101 
3102   case AMDGPU::V_LSHRREV_B16_e32:
3103   case AMDGPU::V_LSHRREV_B16_e64:
3104   case AMDGPU::V_LSHRREV_B16_e32_vi:
3105   case AMDGPU::V_LSHRREV_B16_e64_vi:
3106   case AMDGPU::V_LSHRREV_B16_gfx10:
3107 
3108   case AMDGPU::V_ASHRREV_I16_e32:
3109   case AMDGPU::V_ASHRREV_I16_e64:
3110   case AMDGPU::V_ASHRREV_I16_e32_vi:
3111   case AMDGPU::V_ASHRREV_I16_e64_vi:
3112   case AMDGPU::V_ASHRREV_I16_gfx10:
3113 
3114   case AMDGPU::V_LSHLREV_B64:
3115   case AMDGPU::V_LSHLREV_B64_gfx10:
3116   case AMDGPU::V_LSHLREV_B64_vi:
3117 
3118   case AMDGPU::V_LSHRREV_B64:
3119   case AMDGPU::V_LSHRREV_B64_gfx10:
3120   case AMDGPU::V_LSHRREV_B64_vi:
3121 
3122   case AMDGPU::V_ASHRREV_I64:
3123   case AMDGPU::V_ASHRREV_I64_gfx10:
3124   case AMDGPU::V_ASHRREV_I64_vi:
3125 
3126   case AMDGPU::V_PK_LSHLREV_B16:
3127   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3128   case AMDGPU::V_PK_LSHLREV_B16_vi:
3129 
3130   case AMDGPU::V_PK_LSHRREV_B16:
3131   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3132   case AMDGPU::V_PK_LSHRREV_B16_vi:
3133   case AMDGPU::V_PK_ASHRREV_I16:
3134   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3135   case AMDGPU::V_PK_ASHRREV_I16_vi:
3136     return true;
3137   default:
3138     return false;
3139   }
3140 }
3141 
3142 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3143 
3144   using namespace SIInstrFlags;
3145   const unsigned Opcode = Inst.getOpcode();
3146   const MCInstrDesc &Desc = MII.get(Opcode);
3147 
3148   // lds_direct register is defined so that it can be used
3149   // with 9-bit operands only. Ignore encodings which do not accept these.
3150   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3151     return true;
3152 
3153   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3154   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3155   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3156 
3157   const int SrcIndices[] = { Src1Idx, Src2Idx };
3158 
3159   // lds_direct cannot be specified as either src1 or src2.
3160   for (int SrcIdx : SrcIndices) {
3161     if (SrcIdx == -1) break;
3162     const MCOperand &Src = Inst.getOperand(SrcIdx);
3163     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3164       return false;
3165     }
3166   }
3167 
3168   if (Src0Idx == -1)
3169     return true;
3170 
3171   const MCOperand &Src = Inst.getOperand(Src0Idx);
3172   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3173     return true;
3174 
3175   // lds_direct is specified as src0. Check additional limitations.
3176   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3177 }
3178 
3179 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3180   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3181     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3182     if (Op.isFlatOffset())
3183       return Op.getStartLoc();
3184   }
3185   return getLoc();
3186 }
3187 
3188 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3189                                          const OperandVector &Operands) {
3190   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3191   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3192     return true;
3193 
3194   auto Opcode = Inst.getOpcode();
3195   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3196   assert(OpNum != -1);
3197 
3198   const auto &Op = Inst.getOperand(OpNum);
3199   if (!hasFlatOffsets() && Op.getImm() != 0) {
3200     Error(getFlatOffsetLoc(Operands),
3201           "flat offset modifier is not supported on this GPU");
3202     return false;
3203   }
3204 
3205   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3206   // For FLAT segment the offset must be positive;
3207   // MSB is ignored and forced to zero.
3208   unsigned OffsetSize = isGFX9() ? 13 : 12;
3209   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3210     if (!isIntN(OffsetSize, Op.getImm())) {
3211       Error(getFlatOffsetLoc(Operands),
3212             isGFX9() ? "expected a 13-bit signed offset" :
3213                        "expected a 12-bit signed offset");
3214       return false;
3215     }
3216   } else {
3217     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3218       Error(getFlatOffsetLoc(Operands),
3219             isGFX9() ? "expected a 12-bit unsigned offset" :
3220                        "expected an 11-bit unsigned offset");
3221       return false;
3222     }
3223   }
3224 
3225   return true;
3226 }
3227 
3228 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3229   unsigned Opcode = Inst.getOpcode();
3230   const MCInstrDesc &Desc = MII.get(Opcode);
3231   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3232     return true;
3233 
3234   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3235   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3236 
3237   const int OpIndices[] = { Src0Idx, Src1Idx };
3238 
3239   unsigned NumLiterals = 0;
3240   uint32_t LiteralValue;
3241 
3242   for (int OpIdx : OpIndices) {
3243     if (OpIdx == -1) break;
3244 
3245     const MCOperand &MO = Inst.getOperand(OpIdx);
3246     if (MO.isImm() &&
3247         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3248         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3249         !isInlineConstant(Inst, OpIdx)) {
3250       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3251       if (NumLiterals == 0 || LiteralValue != Value) {
3252         LiteralValue = Value;
3253         ++NumLiterals;
3254       }
3255     }
3256   }
3257 
3258   return NumLiterals <= 1;
3259 }
3260 
3261 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3262   const unsigned Opc = Inst.getOpcode();
3263   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3264       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3265     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3266     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3267 
3268     if (OpSel & ~3)
3269       return false;
3270   }
3271   return true;
3272 }
3273 
3274 // Check if VCC register matches wavefront size
3275 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3276   auto FB = getFeatureBits();
3277   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3278     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3279 }
3280 
3281 // VOP3 literal is only allowed in GFX10+ and only one can be used
3282 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3283   unsigned Opcode = Inst.getOpcode();
3284   const MCInstrDesc &Desc = MII.get(Opcode);
3285   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3286     return true;
3287 
3288   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3289   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3290   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3291 
3292   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3293 
3294   unsigned NumLiterals = 0;
3295   uint32_t LiteralValue;
3296 
3297   for (int OpIdx : OpIndices) {
3298     if (OpIdx == -1) break;
3299 
3300     const MCOperand &MO = Inst.getOperand(OpIdx);
3301     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3302       continue;
3303 
3304     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3305         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3306       return false;
3307 
3308     if (!isInlineConstant(Inst, OpIdx)) {
3309       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3310       if (NumLiterals == 0 || LiteralValue != Value) {
3311         LiteralValue = Value;
3312         ++NumLiterals;
3313       }
3314     }
3315   }
3316 
3317   return !NumLiterals ||
3318          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3319 }
3320 
3321 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3322                                           const SMLoc &IDLoc,
3323                                           const OperandVector &Operands) {
3324   if (!validateLdsDirect(Inst)) {
3325     Error(IDLoc,
3326       "invalid use of lds_direct");
3327     return false;
3328   }
3329   if (!validateSOPLiteral(Inst)) {
3330     Error(IDLoc,
3331       "only one literal operand is allowed");
3332     return false;
3333   }
3334   if (!validateVOP3Literal(Inst)) {
3335     Error(IDLoc,
3336       "invalid literal operand");
3337     return false;
3338   }
3339   if (!validateConstantBusLimitations(Inst)) {
3340     Error(IDLoc,
3341       "invalid operand (violates constant bus restrictions)");
3342     return false;
3343   }
3344   if (!validateEarlyClobberLimitations(Inst)) {
3345     Error(IDLoc,
3346       "destination must be different than all sources");
3347     return false;
3348   }
3349   if (!validateIntClampSupported(Inst)) {
3350     Error(IDLoc,
3351       "integer clamping is not supported on this GPU");
3352     return false;
3353   }
3354   if (!validateOpSel(Inst)) {
3355     Error(IDLoc,
3356       "invalid op_sel operand");
3357     return false;
3358   }
3359   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3360   if (!validateMIMGD16(Inst)) {
3361     Error(IDLoc,
3362       "d16 modifier is not supported on this GPU");
3363     return false;
3364   }
3365   if (!validateMIMGDim(Inst)) {
3366     Error(IDLoc, "dim modifier is required on this GPU");
3367     return false;
3368   }
3369   if (!validateMIMGDataSize(Inst)) {
3370     Error(IDLoc,
3371       "image data size does not match dmask and tfe");
3372     return false;
3373   }
3374   if (!validateMIMGAddrSize(Inst)) {
3375     Error(IDLoc,
3376       "image address size does not match dim and a16");
3377     return false;
3378   }
3379   if (!validateMIMGAtomicDMask(Inst)) {
3380     Error(IDLoc,
3381       "invalid atomic image dmask");
3382     return false;
3383   }
3384   if (!validateMIMGGatherDMask(Inst)) {
3385     Error(IDLoc,
3386       "invalid image_gather dmask: only one bit must be set");
3387     return false;
3388   }
3389   if (!validateFlatOffset(Inst, Operands)) {
3390     return false;
3391   }
3392 
3393   return true;
3394 }
3395 
3396 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3397                                             const FeatureBitset &FBS,
3398                                             unsigned VariantID = 0);
3399 
3400 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3401                                               OperandVector &Operands,
3402                                               MCStreamer &Out,
3403                                               uint64_t &ErrorInfo,
3404                                               bool MatchingInlineAsm) {
3405   MCInst Inst;
3406   unsigned Result = Match_Success;
3407   for (auto Variant : getMatchedVariants()) {
3408     uint64_t EI;
3409     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3410                                   Variant);
3411     // We order match statuses from least to most specific. We use most specific
3412     // status as resulting
3413     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3414     if ((R == Match_Success) ||
3415         (R == Match_PreferE32) ||
3416         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3417         (R == Match_InvalidOperand && Result != Match_MissingFeature
3418                                    && Result != Match_PreferE32) ||
3419         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3420                                    && Result != Match_MissingFeature
3421                                    && Result != Match_PreferE32)) {
3422       Result = R;
3423       ErrorInfo = EI;
3424     }
3425     if (R == Match_Success)
3426       break;
3427   }
3428 
3429   switch (Result) {
3430   default: break;
3431   case Match_Success:
3432     if (!validateInstruction(Inst, IDLoc, Operands)) {
3433       return true;
3434     }
3435     Inst.setLoc(IDLoc);
3436     Out.EmitInstruction(Inst, getSTI());
3437     return false;
3438 
3439   case Match_MissingFeature:
3440     return Error(IDLoc, "instruction not supported on this GPU");
3441 
3442   case Match_MnemonicFail: {
3443     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3444     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3445         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3446     return Error(IDLoc, "invalid instruction" + Suggestion,
3447                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3448   }
3449 
3450   case Match_InvalidOperand: {
3451     SMLoc ErrorLoc = IDLoc;
3452     if (ErrorInfo != ~0ULL) {
3453       if (ErrorInfo >= Operands.size()) {
3454         return Error(IDLoc, "too few operands for instruction");
3455       }
3456       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3457       if (ErrorLoc == SMLoc())
3458         ErrorLoc = IDLoc;
3459     }
3460     return Error(ErrorLoc, "invalid operand for instruction");
3461   }
3462 
3463   case Match_PreferE32:
3464     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3465                         "should be encoded as e32");
3466   }
3467   llvm_unreachable("Implement any new match types added!");
3468 }
3469 
3470 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3471   int64_t Tmp = -1;
3472   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3473     return true;
3474   }
3475   if (getParser().parseAbsoluteExpression(Tmp)) {
3476     return true;
3477   }
3478   Ret = static_cast<uint32_t>(Tmp);
3479   return false;
3480 }
3481 
3482 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3483                                                uint32_t &Minor) {
3484   if (ParseAsAbsoluteExpression(Major))
3485     return TokError("invalid major version");
3486 
3487   if (getLexer().isNot(AsmToken::Comma))
3488     return TokError("minor version number required, comma expected");
3489   Lex();
3490 
3491   if (ParseAsAbsoluteExpression(Minor))
3492     return TokError("invalid minor version");
3493 
3494   return false;
3495 }
3496 
3497 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3498   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3499     return TokError("directive only supported for amdgcn architecture");
3500 
3501   std::string Target;
3502 
3503   SMLoc TargetStart = getTok().getLoc();
3504   if (getParser().parseEscapedString(Target))
3505     return true;
3506   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3507 
3508   std::string ExpectedTarget;
3509   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3510   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3511 
3512   if (Target != ExpectedTargetOS.str())
3513     return getParser().Error(TargetRange.Start, "target must match options",
3514                              TargetRange);
3515 
3516   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3517   return false;
3518 }
3519 
3520 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3521   return getParser().Error(Range.Start, "value out of range", Range);
3522 }
3523 
3524 bool AMDGPUAsmParser::calculateGPRBlocks(
3525     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3526     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3527     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3528     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3529   // TODO(scott.linder): These calculations are duplicated from
3530   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3531   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3532 
3533   unsigned NumVGPRs = NextFreeVGPR;
3534   unsigned NumSGPRs = NextFreeSGPR;
3535 
3536   if (Version.Major >= 10)
3537     NumSGPRs = 0;
3538   else {
3539     unsigned MaxAddressableNumSGPRs =
3540         IsaInfo::getAddressableNumSGPRs(&getSTI());
3541 
3542     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3543         NumSGPRs > MaxAddressableNumSGPRs)
3544       return OutOfRangeError(SGPRRange);
3545 
3546     NumSGPRs +=
3547         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3548 
3549     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3550         NumSGPRs > MaxAddressableNumSGPRs)
3551       return OutOfRangeError(SGPRRange);
3552 
3553     if (Features.test(FeatureSGPRInitBug))
3554       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3555   }
3556 
3557   VGPRBlocks =
3558       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3559   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3560 
3561   return false;
3562 }
3563 
3564 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3565   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3566     return TokError("directive only supported for amdgcn architecture");
3567 
3568   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3569     return TokError("directive only supported for amdhsa OS");
3570 
3571   StringRef KernelName;
3572   if (getParser().parseIdentifier(KernelName))
3573     return true;
3574 
3575   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3576 
3577   StringSet<> Seen;
3578 
3579   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3580 
3581   SMRange VGPRRange;
3582   uint64_t NextFreeVGPR = 0;
3583   SMRange SGPRRange;
3584   uint64_t NextFreeSGPR = 0;
3585   unsigned UserSGPRCount = 0;
3586   bool ReserveVCC = true;
3587   bool ReserveFlatScr = true;
3588   bool ReserveXNACK = hasXNACK();
3589   Optional<bool> EnableWavefrontSize32;
3590 
3591   while (true) {
3592     while (getLexer().is(AsmToken::EndOfStatement))
3593       Lex();
3594 
3595     if (getLexer().isNot(AsmToken::Identifier))
3596       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3597 
3598     StringRef ID = getTok().getIdentifier();
3599     SMRange IDRange = getTok().getLocRange();
3600     Lex();
3601 
3602     if (ID == ".end_amdhsa_kernel")
3603       break;
3604 
3605     if (Seen.find(ID) != Seen.end())
3606       return TokError(".amdhsa_ directives cannot be repeated");
3607     Seen.insert(ID);
3608 
3609     SMLoc ValStart = getTok().getLoc();
3610     int64_t IVal;
3611     if (getParser().parseAbsoluteExpression(IVal))
3612       return true;
3613     SMLoc ValEnd = getTok().getLoc();
3614     SMRange ValRange = SMRange(ValStart, ValEnd);
3615 
3616     if (IVal < 0)
3617       return OutOfRangeError(ValRange);
3618 
3619     uint64_t Val = IVal;
3620 
3621 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3622   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3623     return OutOfRangeError(RANGE);                                             \
3624   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3625 
3626     if (ID == ".amdhsa_group_segment_fixed_size") {
3627       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3628         return OutOfRangeError(ValRange);
3629       KD.group_segment_fixed_size = Val;
3630     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3631       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3632         return OutOfRangeError(ValRange);
3633       KD.private_segment_fixed_size = Val;
3634     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3635       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3636                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3637                        Val, ValRange);
3638       if (Val)
3639         UserSGPRCount += 4;
3640     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3641       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3642                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3643                        ValRange);
3644       if (Val)
3645         UserSGPRCount += 2;
3646     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3647       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3648                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3649                        ValRange);
3650       if (Val)
3651         UserSGPRCount += 2;
3652     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3653       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3654                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3655                        Val, ValRange);
3656       if (Val)
3657         UserSGPRCount += 2;
3658     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3659       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3660                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3661                        ValRange);
3662       if (Val)
3663         UserSGPRCount += 2;
3664     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3665       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3666                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3667                        ValRange);
3668       if (Val)
3669         UserSGPRCount += 2;
3670     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3671       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3672                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3673                        Val, ValRange);
3674       if (Val)
3675         UserSGPRCount += 1;
3676     } else if (ID == ".amdhsa_wavefront_size32") {
3677       if (IVersion.Major < 10)
3678         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3679                                  IDRange);
3680       EnableWavefrontSize32 = Val;
3681       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3682                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3683                        Val, ValRange);
3684     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3685       PARSE_BITS_ENTRY(
3686           KD.compute_pgm_rsrc2,
3687           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3688           ValRange);
3689     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3691                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3692                        ValRange);
3693     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3694       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3695                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3696                        ValRange);
3697     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3698       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3699                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3700                        ValRange);
3701     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3702       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3703                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3704                        ValRange);
3705     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3706       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3707                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3708                        ValRange);
3709     } else if (ID == ".amdhsa_next_free_vgpr") {
3710       VGPRRange = ValRange;
3711       NextFreeVGPR = Val;
3712     } else if (ID == ".amdhsa_next_free_sgpr") {
3713       SGPRRange = ValRange;
3714       NextFreeSGPR = Val;
3715     } else if (ID == ".amdhsa_reserve_vcc") {
3716       if (!isUInt<1>(Val))
3717         return OutOfRangeError(ValRange);
3718       ReserveVCC = Val;
3719     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3720       if (IVersion.Major < 7)
3721         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3722                                  IDRange);
3723       if (!isUInt<1>(Val))
3724         return OutOfRangeError(ValRange);
3725       ReserveFlatScr = Val;
3726     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3727       if (IVersion.Major < 8)
3728         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3729                                  IDRange);
3730       if (!isUInt<1>(Val))
3731         return OutOfRangeError(ValRange);
3732       ReserveXNACK = Val;
3733     } else if (ID == ".amdhsa_float_round_mode_32") {
3734       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3735                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3736     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3737       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3738                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3739     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3740       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3741                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3742     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3744                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3745                        ValRange);
3746     } else if (ID == ".amdhsa_dx10_clamp") {
3747       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3748                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3749     } else if (ID == ".amdhsa_ieee_mode") {
3750       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3751                        Val, ValRange);
3752     } else if (ID == ".amdhsa_fp16_overflow") {
3753       if (IVersion.Major < 9)
3754         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3755                                  IDRange);
3756       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3757                        ValRange);
3758     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3759       if (IVersion.Major < 10)
3760         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3761                                  IDRange);
3762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3763                        ValRange);
3764     } else if (ID == ".amdhsa_memory_ordered") {
3765       if (IVersion.Major < 10)
3766         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3767                                  IDRange);
3768       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3769                        ValRange);
3770     } else if (ID == ".amdhsa_forward_progress") {
3771       if (IVersion.Major < 10)
3772         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3773                                  IDRange);
3774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3775                        ValRange);
3776     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3777       PARSE_BITS_ENTRY(
3778           KD.compute_pgm_rsrc2,
3779           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3780           ValRange);
3781     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3782       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3783                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3784                        Val, ValRange);
3785     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3786       PARSE_BITS_ENTRY(
3787           KD.compute_pgm_rsrc2,
3788           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3789           ValRange);
3790     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3791       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3792                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3793                        Val, ValRange);
3794     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3795       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3796                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3797                        Val, ValRange);
3798     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3799       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3800                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3801                        Val, ValRange);
3802     } else if (ID == ".amdhsa_exception_int_div_zero") {
3803       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3804                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3805                        Val, ValRange);
3806     } else {
3807       return getParser().Error(IDRange.Start,
3808                                "unknown .amdhsa_kernel directive", IDRange);
3809     }
3810 
3811 #undef PARSE_BITS_ENTRY
3812   }
3813 
3814   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3815     return TokError(".amdhsa_next_free_vgpr directive is required");
3816 
3817   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3818     return TokError(".amdhsa_next_free_sgpr directive is required");
3819 
3820   unsigned VGPRBlocks;
3821   unsigned SGPRBlocks;
3822   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3823                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3824                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3825                          SGPRBlocks))
3826     return true;
3827 
3828   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3829           VGPRBlocks))
3830     return OutOfRangeError(VGPRRange);
3831   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3832                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3833 
3834   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3835           SGPRBlocks))
3836     return OutOfRangeError(SGPRRange);
3837   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3838                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3839                   SGPRBlocks);
3840 
3841   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3842     return TokError("too many user SGPRs enabled");
3843   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3844                   UserSGPRCount);
3845 
3846   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3847       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3848       ReserveFlatScr, ReserveXNACK);
3849   return false;
3850 }
3851 
3852 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3853   uint32_t Major;
3854   uint32_t Minor;
3855 
3856   if (ParseDirectiveMajorMinor(Major, Minor))
3857     return true;
3858 
3859   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3860   return false;
3861 }
3862 
3863 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3864   uint32_t Major;
3865   uint32_t Minor;
3866   uint32_t Stepping;
3867   StringRef VendorName;
3868   StringRef ArchName;
3869 
3870   // If this directive has no arguments, then use the ISA version for the
3871   // targeted GPU.
3872   if (getLexer().is(AsmToken::EndOfStatement)) {
3873     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3874     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3875                                                       ISA.Stepping,
3876                                                       "AMD", "AMDGPU");
3877     return false;
3878   }
3879 
3880   if (ParseDirectiveMajorMinor(Major, Minor))
3881     return true;
3882 
3883   if (getLexer().isNot(AsmToken::Comma))
3884     return TokError("stepping version number required, comma expected");
3885   Lex();
3886 
3887   if (ParseAsAbsoluteExpression(Stepping))
3888     return TokError("invalid stepping version");
3889 
3890   if (getLexer().isNot(AsmToken::Comma))
3891     return TokError("vendor name required, comma expected");
3892   Lex();
3893 
3894   if (getLexer().isNot(AsmToken::String))
3895     return TokError("invalid vendor name");
3896 
3897   VendorName = getLexer().getTok().getStringContents();
3898   Lex();
3899 
3900   if (getLexer().isNot(AsmToken::Comma))
3901     return TokError("arch name required, comma expected");
3902   Lex();
3903 
3904   if (getLexer().isNot(AsmToken::String))
3905     return TokError("invalid arch name");
3906 
3907   ArchName = getLexer().getTok().getStringContents();
3908   Lex();
3909 
3910   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3911                                                     VendorName, ArchName);
3912   return false;
3913 }
3914 
3915 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3916                                                amd_kernel_code_t &Header) {
3917   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3918   // assembly for backwards compatibility.
3919   if (ID == "max_scratch_backing_memory_byte_size") {
3920     Parser.eatToEndOfStatement();
3921     return false;
3922   }
3923 
3924   SmallString<40> ErrStr;
3925   raw_svector_ostream Err(ErrStr);
3926   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3927     return TokError(Err.str());
3928   }
3929   Lex();
3930 
3931   if (ID == "enable_wavefront_size32") {
3932     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3933       if (!isGFX10())
3934         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3935       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3936         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3937     } else {
3938       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3939         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3940     }
3941   }
3942 
3943   if (ID == "wavefront_size") {
3944     if (Header.wavefront_size == 5) {
3945       if (!isGFX10())
3946         return TokError("wavefront_size=5 is only allowed on GFX10+");
3947       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3948         return TokError("wavefront_size=5 requires +WavefrontSize32");
3949     } else if (Header.wavefront_size == 6) {
3950       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3951         return TokError("wavefront_size=6 requires +WavefrontSize64");
3952     }
3953   }
3954 
3955   if (ID == "enable_wgp_mode") {
3956     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3957       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3958   }
3959 
3960   if (ID == "enable_mem_ordered") {
3961     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3962       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3963   }
3964 
3965   if (ID == "enable_fwd_progress") {
3966     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3967       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3968   }
3969 
3970   return false;
3971 }
3972 
3973 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3974   amd_kernel_code_t Header;
3975   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3976 
3977   while (true) {
3978     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3979     // will set the current token to EndOfStatement.
3980     while(getLexer().is(AsmToken::EndOfStatement))
3981       Lex();
3982 
3983     if (getLexer().isNot(AsmToken::Identifier))
3984       return TokError("expected value identifier or .end_amd_kernel_code_t");
3985 
3986     StringRef ID = getLexer().getTok().getIdentifier();
3987     Lex();
3988 
3989     if (ID == ".end_amd_kernel_code_t")
3990       break;
3991 
3992     if (ParseAMDKernelCodeTValue(ID, Header))
3993       return true;
3994   }
3995 
3996   getTargetStreamer().EmitAMDKernelCodeT(Header);
3997 
3998   return false;
3999 }
4000 
4001 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4002   if (getLexer().isNot(AsmToken::Identifier))
4003     return TokError("expected symbol name");
4004 
4005   StringRef KernelName = Parser.getTok().getString();
4006 
4007   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4008                                            ELF::STT_AMDGPU_HSA_KERNEL);
4009   Lex();
4010   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4011     KernelScope.initialize(getContext());
4012   return false;
4013 }
4014 
4015 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4016   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4017     return Error(getParser().getTok().getLoc(),
4018                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4019                  "architectures");
4020   }
4021 
4022   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4023 
4024   std::string ISAVersionStringFromSTI;
4025   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4026   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4027 
4028   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4029     return Error(getParser().getTok().getLoc(),
4030                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4031                  "arguments specified through the command line");
4032   }
4033 
4034   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4035   Lex();
4036 
4037   return false;
4038 }
4039 
4040 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4041   const char *AssemblerDirectiveBegin;
4042   const char *AssemblerDirectiveEnd;
4043   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4044       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4045           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4046                             HSAMD::V3::AssemblerDirectiveEnd)
4047           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4048                             HSAMD::AssemblerDirectiveEnd);
4049 
4050   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4051     return Error(getParser().getTok().getLoc(),
4052                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4053                  "not available on non-amdhsa OSes")).str());
4054   }
4055 
4056   std::string HSAMetadataString;
4057   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4058                           HSAMetadataString))
4059     return true;
4060 
4061   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4062     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4063       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4064   } else {
4065     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4066       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4067   }
4068 
4069   return false;
4070 }
4071 
4072 /// Common code to parse out a block of text (typically YAML) between start and
4073 /// end directives.
4074 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4075                                           const char *AssemblerDirectiveEnd,
4076                                           std::string &CollectString) {
4077 
4078   raw_string_ostream CollectStream(CollectString);
4079 
4080   getLexer().setSkipSpace(false);
4081 
4082   bool FoundEnd = false;
4083   while (!getLexer().is(AsmToken::Eof)) {
4084     while (getLexer().is(AsmToken::Space)) {
4085       CollectStream << getLexer().getTok().getString();
4086       Lex();
4087     }
4088 
4089     if (getLexer().is(AsmToken::Identifier)) {
4090       StringRef ID = getLexer().getTok().getIdentifier();
4091       if (ID == AssemblerDirectiveEnd) {
4092         Lex();
4093         FoundEnd = true;
4094         break;
4095       }
4096     }
4097 
4098     CollectStream << Parser.parseStringToEndOfStatement()
4099                   << getContext().getAsmInfo()->getSeparatorString();
4100 
4101     Parser.eatToEndOfStatement();
4102   }
4103 
4104   getLexer().setSkipSpace(true);
4105 
4106   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4107     return TokError(Twine("expected directive ") +
4108                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4109   }
4110 
4111   CollectStream.flush();
4112   return false;
4113 }
4114 
4115 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4116 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4117   std::string String;
4118   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4119                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4120     return true;
4121 
4122   auto PALMetadata = getTargetStreamer().getPALMetadata();
4123   if (!PALMetadata->setFromString(String))
4124     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4125   return false;
4126 }
4127 
4128 /// Parse the assembler directive for old linear-format PAL metadata.
4129 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4130   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4131     return Error(getParser().getTok().getLoc(),
4132                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4133                  "not available on non-amdpal OSes")).str());
4134   }
4135 
4136   auto PALMetadata = getTargetStreamer().getPALMetadata();
4137   PALMetadata->setLegacy();
4138   for (;;) {
4139     uint32_t Key, Value;
4140     if (ParseAsAbsoluteExpression(Key)) {
4141       return TokError(Twine("invalid value in ") +
4142                       Twine(PALMD::AssemblerDirective));
4143     }
4144     if (getLexer().isNot(AsmToken::Comma)) {
4145       return TokError(Twine("expected an even number of values in ") +
4146                       Twine(PALMD::AssemblerDirective));
4147     }
4148     Lex();
4149     if (ParseAsAbsoluteExpression(Value)) {
4150       return TokError(Twine("invalid value in ") +
4151                       Twine(PALMD::AssemblerDirective));
4152     }
4153     PALMetadata->setRegister(Key, Value);
4154     if (getLexer().isNot(AsmToken::Comma))
4155       break;
4156     Lex();
4157   }
4158   return false;
4159 }
4160 
4161 /// ParseDirectiveAMDGPULDS
4162 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4163 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4164   if (getParser().checkForValidSection())
4165     return true;
4166 
4167   StringRef Name;
4168   SMLoc NameLoc = getLexer().getLoc();
4169   if (getParser().parseIdentifier(Name))
4170     return TokError("expected identifier in directive");
4171 
4172   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4173   if (parseToken(AsmToken::Comma, "expected ','"))
4174     return true;
4175 
4176   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4177 
4178   int64_t Size;
4179   SMLoc SizeLoc = getLexer().getLoc();
4180   if (getParser().parseAbsoluteExpression(Size))
4181     return true;
4182   if (Size < 0)
4183     return Error(SizeLoc, "size must be non-negative");
4184   if (Size > LocalMemorySize)
4185     return Error(SizeLoc, "size is too large");
4186 
4187   int64_t Align = 4;
4188   if (getLexer().is(AsmToken::Comma)) {
4189     Lex();
4190     SMLoc AlignLoc = getLexer().getLoc();
4191     if (getParser().parseAbsoluteExpression(Align))
4192       return true;
4193     if (Align < 0 || !isPowerOf2_64(Align))
4194       return Error(AlignLoc, "alignment must be a power of two");
4195 
4196     // Alignment larger than the size of LDS is possible in theory, as long
4197     // as the linker manages to place to symbol at address 0, but we do want
4198     // to make sure the alignment fits nicely into a 32-bit integer.
4199     if (Align >= 1u << 31)
4200       return Error(AlignLoc, "alignment is too large");
4201   }
4202 
4203   if (parseToken(AsmToken::EndOfStatement,
4204                  "unexpected token in '.amdgpu_lds' directive"))
4205     return true;
4206 
4207   Symbol->redefineIfPossible();
4208   if (!Symbol->isUndefined())
4209     return Error(NameLoc, "invalid symbol redefinition");
4210 
4211   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4212   return false;
4213 }
4214 
4215 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4216   StringRef IDVal = DirectiveID.getString();
4217 
4218   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4219     if (IDVal == ".amdgcn_target")
4220       return ParseDirectiveAMDGCNTarget();
4221 
4222     if (IDVal == ".amdhsa_kernel")
4223       return ParseDirectiveAMDHSAKernel();
4224 
4225     // TODO: Restructure/combine with PAL metadata directive.
4226     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4227       return ParseDirectiveHSAMetadata();
4228   } else {
4229     if (IDVal == ".hsa_code_object_version")
4230       return ParseDirectiveHSACodeObjectVersion();
4231 
4232     if (IDVal == ".hsa_code_object_isa")
4233       return ParseDirectiveHSACodeObjectISA();
4234 
4235     if (IDVal == ".amd_kernel_code_t")
4236       return ParseDirectiveAMDKernelCodeT();
4237 
4238     if (IDVal == ".amdgpu_hsa_kernel")
4239       return ParseDirectiveAMDGPUHsaKernel();
4240 
4241     if (IDVal == ".amd_amdgpu_isa")
4242       return ParseDirectiveISAVersion();
4243 
4244     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4245       return ParseDirectiveHSAMetadata();
4246   }
4247 
4248   if (IDVal == ".amdgpu_lds")
4249     return ParseDirectiveAMDGPULDS();
4250 
4251   if (IDVal == PALMD::AssemblerDirectiveBegin)
4252     return ParseDirectivePALMetadataBegin();
4253 
4254   if (IDVal == PALMD::AssemblerDirective)
4255     return ParseDirectivePALMetadata();
4256 
4257   return true;
4258 }
4259 
4260 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4261                                            unsigned RegNo) const {
4262 
4263   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4264        R.isValid(); ++R) {
4265     if (*R == RegNo)
4266       return isGFX9() || isGFX10();
4267   }
4268 
4269   // GFX10 has 2 more SGPRs 104 and 105.
4270   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4271        R.isValid(); ++R) {
4272     if (*R == RegNo)
4273       return hasSGPR104_SGPR105();
4274   }
4275 
4276   switch (RegNo) {
4277   case AMDGPU::SRC_SHARED_BASE:
4278   case AMDGPU::SRC_SHARED_LIMIT:
4279   case AMDGPU::SRC_PRIVATE_BASE:
4280   case AMDGPU::SRC_PRIVATE_LIMIT:
4281   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4282     return !isCI() && !isSI() && !isVI();
4283   case AMDGPU::TBA:
4284   case AMDGPU::TBA_LO:
4285   case AMDGPU::TBA_HI:
4286   case AMDGPU::TMA:
4287   case AMDGPU::TMA_LO:
4288   case AMDGPU::TMA_HI:
4289     return !isGFX9() && !isGFX10();
4290   case AMDGPU::XNACK_MASK:
4291   case AMDGPU::XNACK_MASK_LO:
4292   case AMDGPU::XNACK_MASK_HI:
4293     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4294   case AMDGPU::SGPR_NULL:
4295     return isGFX10();
4296   default:
4297     break;
4298   }
4299 
4300   if (isCI())
4301     return true;
4302 
4303   if (isSI() || isGFX10()) {
4304     // No flat_scr on SI.
4305     // On GFX10 flat scratch is not a valid register operand and can only be
4306     // accessed with s_setreg/s_getreg.
4307     switch (RegNo) {
4308     case AMDGPU::FLAT_SCR:
4309     case AMDGPU::FLAT_SCR_LO:
4310     case AMDGPU::FLAT_SCR_HI:
4311       return false;
4312     default:
4313       return true;
4314     }
4315   }
4316 
4317   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4318   // SI/CI have.
4319   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4320        R.isValid(); ++R) {
4321     if (*R == RegNo)
4322       return hasSGPR102_SGPR103();
4323   }
4324 
4325   return true;
4326 }
4327 
4328 OperandMatchResultTy
4329 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4330                               OperandMode Mode) {
4331   // Try to parse with a custom parser
4332   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4333 
4334   // If we successfully parsed the operand or if there as an error parsing,
4335   // we are done.
4336   //
4337   // If we are parsing after we reach EndOfStatement then this means we
4338   // are appending default values to the Operands list.  This is only done
4339   // by custom parser, so we shouldn't continue on to the generic parsing.
4340   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4341       getLexer().is(AsmToken::EndOfStatement))
4342     return ResTy;
4343 
4344   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4345     unsigned Prefix = Operands.size();
4346     SMLoc LBraceLoc = getTok().getLoc();
4347     Parser.Lex(); // eat the '['
4348 
4349     for (;;) {
4350       ResTy = parseReg(Operands);
4351       if (ResTy != MatchOperand_Success)
4352         return ResTy;
4353 
4354       if (getLexer().is(AsmToken::RBrac))
4355         break;
4356 
4357       if (getLexer().isNot(AsmToken::Comma))
4358         return MatchOperand_ParseFail;
4359       Parser.Lex();
4360     }
4361 
4362     if (Operands.size() - Prefix > 1) {
4363       Operands.insert(Operands.begin() + Prefix,
4364                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4365       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4366                                                     getTok().getLoc()));
4367     }
4368 
4369     Parser.Lex(); // eat the ']'
4370     return MatchOperand_Success;
4371   }
4372 
4373   return parseRegOrImm(Operands);
4374 }
4375 
4376 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4377   // Clear any forced encodings from the previous instruction.
4378   setForcedEncodingSize(0);
4379   setForcedDPP(false);
4380   setForcedSDWA(false);
4381 
4382   if (Name.endswith("_e64")) {
4383     setForcedEncodingSize(64);
4384     return Name.substr(0, Name.size() - 4);
4385   } else if (Name.endswith("_e32")) {
4386     setForcedEncodingSize(32);
4387     return Name.substr(0, Name.size() - 4);
4388   } else if (Name.endswith("_dpp")) {
4389     setForcedDPP(true);
4390     return Name.substr(0, Name.size() - 4);
4391   } else if (Name.endswith("_sdwa")) {
4392     setForcedSDWA(true);
4393     return Name.substr(0, Name.size() - 5);
4394   }
4395   return Name;
4396 }
4397 
4398 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4399                                        StringRef Name,
4400                                        SMLoc NameLoc, OperandVector &Operands) {
4401   // Add the instruction mnemonic
4402   Name = parseMnemonicSuffix(Name);
4403   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4404 
4405   bool IsMIMG = Name.startswith("image_");
4406 
4407   while (!getLexer().is(AsmToken::EndOfStatement)) {
4408     OperandMode Mode = OperandMode_Default;
4409     if (IsMIMG && isGFX10() && Operands.size() == 2)
4410       Mode = OperandMode_NSA;
4411     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4412 
4413     // Eat the comma or space if there is one.
4414     if (getLexer().is(AsmToken::Comma))
4415       Parser.Lex();
4416 
4417     switch (Res) {
4418       case MatchOperand_Success: break;
4419       case MatchOperand_ParseFail:
4420         // FIXME: use real operand location rather than the current location.
4421         Error(getLexer().getLoc(), "failed parsing operand.");
4422         while (!getLexer().is(AsmToken::EndOfStatement)) {
4423           Parser.Lex();
4424         }
4425         return true;
4426       case MatchOperand_NoMatch:
4427         // FIXME: use real operand location rather than the current location.
4428         Error(getLexer().getLoc(), "not a valid operand.");
4429         while (!getLexer().is(AsmToken::EndOfStatement)) {
4430           Parser.Lex();
4431         }
4432         return true;
4433     }
4434   }
4435 
4436   return false;
4437 }
4438 
4439 //===----------------------------------------------------------------------===//
4440 // Utility functions
4441 //===----------------------------------------------------------------------===//
4442 
4443 OperandMatchResultTy
4444 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4445 
4446   if (!trySkipId(Prefix, AsmToken::Colon))
4447     return MatchOperand_NoMatch;
4448 
4449   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4450 }
4451 
4452 OperandMatchResultTy
4453 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4454                                     AMDGPUOperand::ImmTy ImmTy,
4455                                     bool (*ConvertResult)(int64_t&)) {
4456   SMLoc S = getLoc();
4457   int64_t Value = 0;
4458 
4459   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4460   if (Res != MatchOperand_Success)
4461     return Res;
4462 
4463   if (ConvertResult && !ConvertResult(Value)) {
4464     Error(S, "invalid " + StringRef(Prefix) + " value.");
4465   }
4466 
4467   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4468   return MatchOperand_Success;
4469 }
4470 
4471 OperandMatchResultTy
4472 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4473                                              OperandVector &Operands,
4474                                              AMDGPUOperand::ImmTy ImmTy,
4475                                              bool (*ConvertResult)(int64_t&)) {
4476   SMLoc S = getLoc();
4477   if (!trySkipId(Prefix, AsmToken::Colon))
4478     return MatchOperand_NoMatch;
4479 
4480   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4481     return MatchOperand_ParseFail;
4482 
4483   unsigned Val = 0;
4484   const unsigned MaxSize = 4;
4485 
4486   // FIXME: How to verify the number of elements matches the number of src
4487   // operands?
4488   for (int I = 0; ; ++I) {
4489     int64_t Op;
4490     SMLoc Loc = getLoc();
4491     if (!parseExpr(Op))
4492       return MatchOperand_ParseFail;
4493 
4494     if (Op != 0 && Op != 1) {
4495       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4496       return MatchOperand_ParseFail;
4497     }
4498 
4499     Val |= (Op << I);
4500 
4501     if (trySkipToken(AsmToken::RBrac))
4502       break;
4503 
4504     if (I + 1 == MaxSize) {
4505       Error(getLoc(), "expected a closing square bracket");
4506       return MatchOperand_ParseFail;
4507     }
4508 
4509     if (!skipToken(AsmToken::Comma, "expected a comma"))
4510       return MatchOperand_ParseFail;
4511   }
4512 
4513   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4514   return MatchOperand_Success;
4515 }
4516 
4517 OperandMatchResultTy
4518 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4519                                AMDGPUOperand::ImmTy ImmTy) {
4520   int64_t Bit = 0;
4521   SMLoc S = Parser.getTok().getLoc();
4522 
4523   // We are at the end of the statement, and this is a default argument, so
4524   // use a default value.
4525   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4526     switch(getLexer().getKind()) {
4527       case AsmToken::Identifier: {
4528         StringRef Tok = Parser.getTok().getString();
4529         if (Tok == Name) {
4530           if (Tok == "r128" && isGFX9())
4531             Error(S, "r128 modifier is not supported on this GPU");
4532           if (Tok == "a16" && !isGFX9() && !isGFX10())
4533             Error(S, "a16 modifier is not supported on this GPU");
4534           Bit = 1;
4535           Parser.Lex();
4536         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4537           Bit = 0;
4538           Parser.Lex();
4539         } else {
4540           return MatchOperand_NoMatch;
4541         }
4542         break;
4543       }
4544       default:
4545         return MatchOperand_NoMatch;
4546     }
4547   }
4548 
4549   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4550     return MatchOperand_ParseFail;
4551 
4552   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4553   return MatchOperand_Success;
4554 }
4555 
4556 static void addOptionalImmOperand(
4557   MCInst& Inst, const OperandVector& Operands,
4558   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4559   AMDGPUOperand::ImmTy ImmT,
4560   int64_t Default = 0) {
4561   auto i = OptionalIdx.find(ImmT);
4562   if (i != OptionalIdx.end()) {
4563     unsigned Idx = i->second;
4564     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4565   } else {
4566     Inst.addOperand(MCOperand::createImm(Default));
4567   }
4568 }
4569 
4570 OperandMatchResultTy
4571 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4572   if (getLexer().isNot(AsmToken::Identifier)) {
4573     return MatchOperand_NoMatch;
4574   }
4575   StringRef Tok = Parser.getTok().getString();
4576   if (Tok != Prefix) {
4577     return MatchOperand_NoMatch;
4578   }
4579 
4580   Parser.Lex();
4581   if (getLexer().isNot(AsmToken::Colon)) {
4582     return MatchOperand_ParseFail;
4583   }
4584 
4585   Parser.Lex();
4586   if (getLexer().isNot(AsmToken::Identifier)) {
4587     return MatchOperand_ParseFail;
4588   }
4589 
4590   Value = Parser.getTok().getString();
4591   return MatchOperand_Success;
4592 }
4593 
4594 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4595 // values to live in a joint format operand in the MCInst encoding.
4596 OperandMatchResultTy
4597 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4598   SMLoc S = Parser.getTok().getLoc();
4599   int64_t Dfmt = 0, Nfmt = 0;
4600   // dfmt and nfmt can appear in either order, and each is optional.
4601   bool GotDfmt = false, GotNfmt = false;
4602   while (!GotDfmt || !GotNfmt) {
4603     if (!GotDfmt) {
4604       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4605       if (Res != MatchOperand_NoMatch) {
4606         if (Res != MatchOperand_Success)
4607           return Res;
4608         if (Dfmt >= 16) {
4609           Error(Parser.getTok().getLoc(), "out of range dfmt");
4610           return MatchOperand_ParseFail;
4611         }
4612         GotDfmt = true;
4613         Parser.Lex();
4614         continue;
4615       }
4616     }
4617     if (!GotNfmt) {
4618       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4619       if (Res != MatchOperand_NoMatch) {
4620         if (Res != MatchOperand_Success)
4621           return Res;
4622         if (Nfmt >= 8) {
4623           Error(Parser.getTok().getLoc(), "out of range nfmt");
4624           return MatchOperand_ParseFail;
4625         }
4626         GotNfmt = true;
4627         Parser.Lex();
4628         continue;
4629       }
4630     }
4631     break;
4632   }
4633   if (!GotDfmt && !GotNfmt)
4634     return MatchOperand_NoMatch;
4635   auto Format = Dfmt | Nfmt << 4;
4636   Operands.push_back(
4637       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4638   return MatchOperand_Success;
4639 }
4640 
4641 //===----------------------------------------------------------------------===//
4642 // ds
4643 //===----------------------------------------------------------------------===//
4644 
4645 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4646                                     const OperandVector &Operands) {
4647   OptionalImmIndexMap OptionalIdx;
4648 
4649   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4650     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4651 
4652     // Add the register arguments
4653     if (Op.isReg()) {
4654       Op.addRegOperands(Inst, 1);
4655       continue;
4656     }
4657 
4658     // Handle optional arguments
4659     OptionalIdx[Op.getImmTy()] = i;
4660   }
4661 
4662   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4663   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4664   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4665 
4666   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4667 }
4668 
4669 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4670                                 bool IsGdsHardcoded) {
4671   OptionalImmIndexMap OptionalIdx;
4672 
4673   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4674     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4675 
4676     // Add the register arguments
4677     if (Op.isReg()) {
4678       Op.addRegOperands(Inst, 1);
4679       continue;
4680     }
4681 
4682     if (Op.isToken() && Op.getToken() == "gds") {
4683       IsGdsHardcoded = true;
4684       continue;
4685     }
4686 
4687     // Handle optional arguments
4688     OptionalIdx[Op.getImmTy()] = i;
4689   }
4690 
4691   AMDGPUOperand::ImmTy OffsetType =
4692     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4693      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4694      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4695                                                       AMDGPUOperand::ImmTyOffset;
4696 
4697   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4698 
4699   if (!IsGdsHardcoded) {
4700     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4701   }
4702   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4703 }
4704 
4705 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4706   OptionalImmIndexMap OptionalIdx;
4707 
4708   unsigned OperandIdx[4];
4709   unsigned EnMask = 0;
4710   int SrcIdx = 0;
4711 
4712   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4713     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4714 
4715     // Add the register arguments
4716     if (Op.isReg()) {
4717       assert(SrcIdx < 4);
4718       OperandIdx[SrcIdx] = Inst.size();
4719       Op.addRegOperands(Inst, 1);
4720       ++SrcIdx;
4721       continue;
4722     }
4723 
4724     if (Op.isOff()) {
4725       assert(SrcIdx < 4);
4726       OperandIdx[SrcIdx] = Inst.size();
4727       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4728       ++SrcIdx;
4729       continue;
4730     }
4731 
4732     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4733       Op.addImmOperands(Inst, 1);
4734       continue;
4735     }
4736 
4737     if (Op.isToken() && Op.getToken() == "done")
4738       continue;
4739 
4740     // Handle optional arguments
4741     OptionalIdx[Op.getImmTy()] = i;
4742   }
4743 
4744   assert(SrcIdx == 4);
4745 
4746   bool Compr = false;
4747   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4748     Compr = true;
4749     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4750     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4751     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4752   }
4753 
4754   for (auto i = 0; i < SrcIdx; ++i) {
4755     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4756       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4757     }
4758   }
4759 
4760   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4761   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4762 
4763   Inst.addOperand(MCOperand::createImm(EnMask));
4764 }
4765 
4766 //===----------------------------------------------------------------------===//
4767 // s_waitcnt
4768 //===----------------------------------------------------------------------===//
4769 
4770 static bool
4771 encodeCnt(
4772   const AMDGPU::IsaVersion ISA,
4773   int64_t &IntVal,
4774   int64_t CntVal,
4775   bool Saturate,
4776   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4777   unsigned (*decode)(const IsaVersion &Version, unsigned))
4778 {
4779   bool Failed = false;
4780 
4781   IntVal = encode(ISA, IntVal, CntVal);
4782   if (CntVal != decode(ISA, IntVal)) {
4783     if (Saturate) {
4784       IntVal = encode(ISA, IntVal, -1);
4785     } else {
4786       Failed = true;
4787     }
4788   }
4789   return Failed;
4790 }
4791 
4792 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4793 
4794   SMLoc CntLoc = getLoc();
4795   StringRef CntName = getTokenStr();
4796 
4797   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4798       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4799     return false;
4800 
4801   int64_t CntVal;
4802   SMLoc ValLoc = getLoc();
4803   if (!parseExpr(CntVal))
4804     return false;
4805 
4806   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4807 
4808   bool Failed = true;
4809   bool Sat = CntName.endswith("_sat");
4810 
4811   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4812     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4813   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4814     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4815   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4816     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4817   } else {
4818     Error(CntLoc, "invalid counter name " + CntName);
4819     return false;
4820   }
4821 
4822   if (Failed) {
4823     Error(ValLoc, "too large value for " + CntName);
4824     return false;
4825   }
4826 
4827   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4828     return false;
4829 
4830   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4831     if (isToken(AsmToken::EndOfStatement)) {
4832       Error(getLoc(), "expected a counter name");
4833       return false;
4834     }
4835   }
4836 
4837   return true;
4838 }
4839 
4840 OperandMatchResultTy
4841 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4842   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4843   int64_t Waitcnt = getWaitcntBitMask(ISA);
4844   SMLoc S = getLoc();
4845 
4846   // If parse failed, do not return error code
4847   // to avoid excessive error messages.
4848   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4849     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4850   } else {
4851     parseExpr(Waitcnt);
4852   }
4853 
4854   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4855   return MatchOperand_Success;
4856 }
4857 
4858 bool
4859 AMDGPUOperand::isSWaitCnt() const {
4860   return isImm();
4861 }
4862 
4863 //===----------------------------------------------------------------------===//
4864 // hwreg
4865 //===----------------------------------------------------------------------===//
4866 
4867 bool
4868 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4869                                 int64_t &Offset,
4870                                 int64_t &Width) {
4871   using namespace llvm::AMDGPU::Hwreg;
4872 
4873   // The register may be specified by name or using a numeric code
4874   if (isToken(AsmToken::Identifier) &&
4875       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4876     HwReg.IsSymbolic = true;
4877     lex(); // skip message name
4878   } else if (!parseExpr(HwReg.Id)) {
4879     return false;
4880   }
4881 
4882   if (trySkipToken(AsmToken::RParen))
4883     return true;
4884 
4885   // parse optional params
4886   return
4887     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4888     parseExpr(Offset) &&
4889     skipToken(AsmToken::Comma, "expected a comma") &&
4890     parseExpr(Width) &&
4891     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4892 }
4893 
4894 bool
4895 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4896                                const int64_t Offset,
4897                                const int64_t Width,
4898                                const SMLoc Loc) {
4899 
4900   using namespace llvm::AMDGPU::Hwreg;
4901 
4902   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4903     Error(Loc, "specified hardware register is not supported on this GPU");
4904     return false;
4905   } else if (!isValidHwreg(HwReg.Id)) {
4906     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4907     return false;
4908   } else if (!isValidHwregOffset(Offset)) {
4909     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4910     return false;
4911   } else if (!isValidHwregWidth(Width)) {
4912     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4913     return false;
4914   }
4915   return true;
4916 }
4917 
4918 OperandMatchResultTy
4919 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4920   using namespace llvm::AMDGPU::Hwreg;
4921 
4922   int64_t ImmVal = 0;
4923   SMLoc Loc = getLoc();
4924 
4925   // If parse failed, do not return error code
4926   // to avoid excessive error messages.
4927   if (trySkipId("hwreg", AsmToken::LParen)) {
4928     OperandInfoTy HwReg(ID_UNKNOWN_);
4929     int64_t Offset = OFFSET_DEFAULT_;
4930     int64_t Width = WIDTH_DEFAULT_;
4931     if (parseHwregBody(HwReg, Offset, Width) &&
4932         validateHwreg(HwReg, Offset, Width, Loc)) {
4933       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4934     }
4935   } else if (parseExpr(ImmVal)) {
4936     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4937       Error(Loc, "invalid immediate: only 16-bit values are legal");
4938   }
4939 
4940   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4941   return MatchOperand_Success;
4942 }
4943 
4944 bool AMDGPUOperand::isHwreg() const {
4945   return isImmTy(ImmTyHwreg);
4946 }
4947 
4948 //===----------------------------------------------------------------------===//
4949 // sendmsg
4950 //===----------------------------------------------------------------------===//
4951 
4952 bool
4953 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4954                                   OperandInfoTy &Op,
4955                                   OperandInfoTy &Stream) {
4956   using namespace llvm::AMDGPU::SendMsg;
4957 
4958   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4959     Msg.IsSymbolic = true;
4960     lex(); // skip message name
4961   } else if (!parseExpr(Msg.Id)) {
4962     return false;
4963   }
4964 
4965   if (trySkipToken(AsmToken::Comma)) {
4966     Op.IsDefined = true;
4967     if (isToken(AsmToken::Identifier) &&
4968         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4969       lex(); // skip operation name
4970     } else if (!parseExpr(Op.Id)) {
4971       return false;
4972     }
4973 
4974     if (trySkipToken(AsmToken::Comma)) {
4975       Stream.IsDefined = true;
4976       if (!parseExpr(Stream.Id))
4977         return false;
4978     }
4979   }
4980 
4981   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4982 }
4983 
4984 bool
4985 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4986                                  const OperandInfoTy &Op,
4987                                  const OperandInfoTy &Stream,
4988                                  const SMLoc S) {
4989   using namespace llvm::AMDGPU::SendMsg;
4990 
4991   // Validation strictness depends on whether message is specified
4992   // in a symbolc or in a numeric form. In the latter case
4993   // only encoding possibility is checked.
4994   bool Strict = Msg.IsSymbolic;
4995 
4996   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4997     Error(S, "invalid message id");
4998     return false;
4999   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5000     Error(S, Op.IsDefined ?
5001              "message does not support operations" :
5002              "missing message operation");
5003     return false;
5004   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5005     Error(S, "invalid operation id");
5006     return false;
5007   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5008     Error(S, "message operation does not support streams");
5009     return false;
5010   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5011     Error(S, "invalid message stream id");
5012     return false;
5013   }
5014   return true;
5015 }
5016 
5017 OperandMatchResultTy
5018 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5019   using namespace llvm::AMDGPU::SendMsg;
5020 
5021   int64_t ImmVal = 0;
5022   SMLoc Loc = getLoc();
5023 
5024   // If parse failed, do not return error code
5025   // to avoid excessive error messages.
5026   if (trySkipId("sendmsg", AsmToken::LParen)) {
5027     OperandInfoTy Msg(ID_UNKNOWN_);
5028     OperandInfoTy Op(OP_NONE_);
5029     OperandInfoTy Stream(STREAM_ID_NONE_);
5030     if (parseSendMsgBody(Msg, Op, Stream) &&
5031         validateSendMsg(Msg, Op, Stream, Loc)) {
5032       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5033     }
5034   } else if (parseExpr(ImmVal)) {
5035     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5036       Error(Loc, "invalid immediate: only 16-bit values are legal");
5037   }
5038 
5039   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5040   return MatchOperand_Success;
5041 }
5042 
5043 bool AMDGPUOperand::isSendMsg() const {
5044   return isImmTy(ImmTySendMsg);
5045 }
5046 
5047 //===----------------------------------------------------------------------===//
5048 // v_interp
5049 //===----------------------------------------------------------------------===//
5050 
5051 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5052   if (getLexer().getKind() != AsmToken::Identifier)
5053     return MatchOperand_NoMatch;
5054 
5055   StringRef Str = Parser.getTok().getString();
5056   int Slot = StringSwitch<int>(Str)
5057     .Case("p10", 0)
5058     .Case("p20", 1)
5059     .Case("p0", 2)
5060     .Default(-1);
5061 
5062   SMLoc S = Parser.getTok().getLoc();
5063   if (Slot == -1)
5064     return MatchOperand_ParseFail;
5065 
5066   Parser.Lex();
5067   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5068                                               AMDGPUOperand::ImmTyInterpSlot));
5069   return MatchOperand_Success;
5070 }
5071 
5072 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5073   if (getLexer().getKind() != AsmToken::Identifier)
5074     return MatchOperand_NoMatch;
5075 
5076   StringRef Str = Parser.getTok().getString();
5077   if (!Str.startswith("attr"))
5078     return MatchOperand_NoMatch;
5079 
5080   StringRef Chan = Str.take_back(2);
5081   int AttrChan = StringSwitch<int>(Chan)
5082     .Case(".x", 0)
5083     .Case(".y", 1)
5084     .Case(".z", 2)
5085     .Case(".w", 3)
5086     .Default(-1);
5087   if (AttrChan == -1)
5088     return MatchOperand_ParseFail;
5089 
5090   Str = Str.drop_back(2).drop_front(4);
5091 
5092   uint8_t Attr;
5093   if (Str.getAsInteger(10, Attr))
5094     return MatchOperand_ParseFail;
5095 
5096   SMLoc S = Parser.getTok().getLoc();
5097   Parser.Lex();
5098   if (Attr > 63) {
5099     Error(S, "out of bounds attr");
5100     return MatchOperand_Success;
5101   }
5102 
5103   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5104 
5105   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5106                                               AMDGPUOperand::ImmTyInterpAttr));
5107   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5108                                               AMDGPUOperand::ImmTyAttrChan));
5109   return MatchOperand_Success;
5110 }
5111 
5112 //===----------------------------------------------------------------------===//
5113 // exp
5114 //===----------------------------------------------------------------------===//
5115 
5116 void AMDGPUAsmParser::errorExpTgt() {
5117   Error(Parser.getTok().getLoc(), "invalid exp target");
5118 }
5119 
5120 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5121                                                       uint8_t &Val) {
5122   if (Str == "null") {
5123     Val = 9;
5124     return MatchOperand_Success;
5125   }
5126 
5127   if (Str.startswith("mrt")) {
5128     Str = Str.drop_front(3);
5129     if (Str == "z") { // == mrtz
5130       Val = 8;
5131       return MatchOperand_Success;
5132     }
5133 
5134     if (Str.getAsInteger(10, Val))
5135       return MatchOperand_ParseFail;
5136 
5137     if (Val > 7)
5138       errorExpTgt();
5139 
5140     return MatchOperand_Success;
5141   }
5142 
5143   if (Str.startswith("pos")) {
5144     Str = Str.drop_front(3);
5145     if (Str.getAsInteger(10, Val))
5146       return MatchOperand_ParseFail;
5147 
5148     if (Val > 4 || (Val == 4 && !isGFX10()))
5149       errorExpTgt();
5150 
5151     Val += 12;
5152     return MatchOperand_Success;
5153   }
5154 
5155   if (isGFX10() && Str == "prim") {
5156     Val = 20;
5157     return MatchOperand_Success;
5158   }
5159 
5160   if (Str.startswith("param")) {
5161     Str = Str.drop_front(5);
5162     if (Str.getAsInteger(10, Val))
5163       return MatchOperand_ParseFail;
5164 
5165     if (Val >= 32)
5166       errorExpTgt();
5167 
5168     Val += 32;
5169     return MatchOperand_Success;
5170   }
5171 
5172   if (Str.startswith("invalid_target_")) {
5173     Str = Str.drop_front(15);
5174     if (Str.getAsInteger(10, Val))
5175       return MatchOperand_ParseFail;
5176 
5177     errorExpTgt();
5178     return MatchOperand_Success;
5179   }
5180 
5181   return MatchOperand_NoMatch;
5182 }
5183 
5184 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5185   uint8_t Val;
5186   StringRef Str = Parser.getTok().getString();
5187 
5188   auto Res = parseExpTgtImpl(Str, Val);
5189   if (Res != MatchOperand_Success)
5190     return Res;
5191 
5192   SMLoc S = Parser.getTok().getLoc();
5193   Parser.Lex();
5194 
5195   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5196                                               AMDGPUOperand::ImmTyExpTgt));
5197   return MatchOperand_Success;
5198 }
5199 
5200 //===----------------------------------------------------------------------===//
5201 // parser helpers
5202 //===----------------------------------------------------------------------===//
5203 
5204 bool
5205 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5206   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5207 }
5208 
5209 bool
5210 AMDGPUAsmParser::isId(const StringRef Id) const {
5211   return isId(getToken(), Id);
5212 }
5213 
5214 bool
5215 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5216   return getTokenKind() == Kind;
5217 }
5218 
5219 bool
5220 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5221   if (isId(Id)) {
5222     lex();
5223     return true;
5224   }
5225   return false;
5226 }
5227 
5228 bool
5229 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5230   if (isId(Id) && peekToken().is(Kind)) {
5231     lex();
5232     lex();
5233     return true;
5234   }
5235   return false;
5236 }
5237 
5238 bool
5239 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5240   if (isToken(Kind)) {
5241     lex();
5242     return true;
5243   }
5244   return false;
5245 }
5246 
5247 bool
5248 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5249                            const StringRef ErrMsg) {
5250   if (!trySkipToken(Kind)) {
5251     Error(getLoc(), ErrMsg);
5252     return false;
5253   }
5254   return true;
5255 }
5256 
5257 bool
5258 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5259   return !getParser().parseAbsoluteExpression(Imm);
5260 }
5261 
5262 bool
5263 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5264   SMLoc S = getLoc();
5265 
5266   const MCExpr *Expr;
5267   if (Parser.parseExpression(Expr))
5268     return false;
5269 
5270   int64_t IntVal;
5271   if (Expr->evaluateAsAbsolute(IntVal)) {
5272     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5273   } else {
5274     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5275   }
5276   return true;
5277 }
5278 
5279 bool
5280 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5281   if (isToken(AsmToken::String)) {
5282     Val = getToken().getStringContents();
5283     lex();
5284     return true;
5285   } else {
5286     Error(getLoc(), ErrMsg);
5287     return false;
5288   }
5289 }
5290 
5291 AsmToken
5292 AMDGPUAsmParser::getToken() const {
5293   return Parser.getTok();
5294 }
5295 
5296 AsmToken
5297 AMDGPUAsmParser::peekToken() {
5298   return getLexer().peekTok();
5299 }
5300 
5301 void
5302 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5303   auto TokCount = getLexer().peekTokens(Tokens);
5304 
5305   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5306     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5307 }
5308 
5309 AsmToken::TokenKind
5310 AMDGPUAsmParser::getTokenKind() const {
5311   return getLexer().getKind();
5312 }
5313 
5314 SMLoc
5315 AMDGPUAsmParser::getLoc() const {
5316   return getToken().getLoc();
5317 }
5318 
5319 StringRef
5320 AMDGPUAsmParser::getTokenStr() const {
5321   return getToken().getString();
5322 }
5323 
5324 void
5325 AMDGPUAsmParser::lex() {
5326   Parser.Lex();
5327 }
5328 
5329 //===----------------------------------------------------------------------===//
5330 // swizzle
5331 //===----------------------------------------------------------------------===//
5332 
5333 LLVM_READNONE
5334 static unsigned
5335 encodeBitmaskPerm(const unsigned AndMask,
5336                   const unsigned OrMask,
5337                   const unsigned XorMask) {
5338   using namespace llvm::AMDGPU::Swizzle;
5339 
5340   return BITMASK_PERM_ENC |
5341          (AndMask << BITMASK_AND_SHIFT) |
5342          (OrMask  << BITMASK_OR_SHIFT)  |
5343          (XorMask << BITMASK_XOR_SHIFT);
5344 }
5345 
5346 bool
5347 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5348                                       const unsigned MinVal,
5349                                       const unsigned MaxVal,
5350                                       const StringRef ErrMsg) {
5351   for (unsigned i = 0; i < OpNum; ++i) {
5352     if (!skipToken(AsmToken::Comma, "expected a comma")){
5353       return false;
5354     }
5355     SMLoc ExprLoc = Parser.getTok().getLoc();
5356     if (!parseExpr(Op[i])) {
5357       return false;
5358     }
5359     if (Op[i] < MinVal || Op[i] > MaxVal) {
5360       Error(ExprLoc, ErrMsg);
5361       return false;
5362     }
5363   }
5364 
5365   return true;
5366 }
5367 
5368 bool
5369 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5370   using namespace llvm::AMDGPU::Swizzle;
5371 
5372   int64_t Lane[LANE_NUM];
5373   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5374                            "expected a 2-bit lane id")) {
5375     Imm = QUAD_PERM_ENC;
5376     for (unsigned I = 0; I < LANE_NUM; ++I) {
5377       Imm |= Lane[I] << (LANE_SHIFT * I);
5378     }
5379     return true;
5380   }
5381   return false;
5382 }
5383 
5384 bool
5385 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5386   using namespace llvm::AMDGPU::Swizzle;
5387 
5388   SMLoc S = Parser.getTok().getLoc();
5389   int64_t GroupSize;
5390   int64_t LaneIdx;
5391 
5392   if (!parseSwizzleOperands(1, &GroupSize,
5393                             2, 32,
5394                             "group size must be in the interval [2,32]")) {
5395     return false;
5396   }
5397   if (!isPowerOf2_64(GroupSize)) {
5398     Error(S, "group size must be a power of two");
5399     return false;
5400   }
5401   if (parseSwizzleOperands(1, &LaneIdx,
5402                            0, GroupSize - 1,
5403                            "lane id must be in the interval [0,group size - 1]")) {
5404     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5405     return true;
5406   }
5407   return false;
5408 }
5409 
5410 bool
5411 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5412   using namespace llvm::AMDGPU::Swizzle;
5413 
5414   SMLoc S = Parser.getTok().getLoc();
5415   int64_t GroupSize;
5416 
5417   if (!parseSwizzleOperands(1, &GroupSize,
5418       2, 32, "group size must be in the interval [2,32]")) {
5419     return false;
5420   }
5421   if (!isPowerOf2_64(GroupSize)) {
5422     Error(S, "group size must be a power of two");
5423     return false;
5424   }
5425 
5426   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5427   return true;
5428 }
5429 
5430 bool
5431 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5432   using namespace llvm::AMDGPU::Swizzle;
5433 
5434   SMLoc S = Parser.getTok().getLoc();
5435   int64_t GroupSize;
5436 
5437   if (!parseSwizzleOperands(1, &GroupSize,
5438       1, 16, "group size must be in the interval [1,16]")) {
5439     return false;
5440   }
5441   if (!isPowerOf2_64(GroupSize)) {
5442     Error(S, "group size must be a power of two");
5443     return false;
5444   }
5445 
5446   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5447   return true;
5448 }
5449 
5450 bool
5451 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5452   using namespace llvm::AMDGPU::Swizzle;
5453 
5454   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5455     return false;
5456   }
5457 
5458   StringRef Ctl;
5459   SMLoc StrLoc = Parser.getTok().getLoc();
5460   if (!parseString(Ctl)) {
5461     return false;
5462   }
5463   if (Ctl.size() != BITMASK_WIDTH) {
5464     Error(StrLoc, "expected a 5-character mask");
5465     return false;
5466   }
5467 
5468   unsigned AndMask = 0;
5469   unsigned OrMask = 0;
5470   unsigned XorMask = 0;
5471 
5472   for (size_t i = 0; i < Ctl.size(); ++i) {
5473     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5474     switch(Ctl[i]) {
5475     default:
5476       Error(StrLoc, "invalid mask");
5477       return false;
5478     case '0':
5479       break;
5480     case '1':
5481       OrMask |= Mask;
5482       break;
5483     case 'p':
5484       AndMask |= Mask;
5485       break;
5486     case 'i':
5487       AndMask |= Mask;
5488       XorMask |= Mask;
5489       break;
5490     }
5491   }
5492 
5493   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5494   return true;
5495 }
5496 
5497 bool
5498 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5499 
5500   SMLoc OffsetLoc = Parser.getTok().getLoc();
5501 
5502   if (!parseExpr(Imm)) {
5503     return false;
5504   }
5505   if (!isUInt<16>(Imm)) {
5506     Error(OffsetLoc, "expected a 16-bit offset");
5507     return false;
5508   }
5509   return true;
5510 }
5511 
5512 bool
5513 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5514   using namespace llvm::AMDGPU::Swizzle;
5515 
5516   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5517 
5518     SMLoc ModeLoc = Parser.getTok().getLoc();
5519     bool Ok = false;
5520 
5521     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5522       Ok = parseSwizzleQuadPerm(Imm);
5523     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5524       Ok = parseSwizzleBitmaskPerm(Imm);
5525     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5526       Ok = parseSwizzleBroadcast(Imm);
5527     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5528       Ok = parseSwizzleSwap(Imm);
5529     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5530       Ok = parseSwizzleReverse(Imm);
5531     } else {
5532       Error(ModeLoc, "expected a swizzle mode");
5533     }
5534 
5535     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5536   }
5537 
5538   return false;
5539 }
5540 
5541 OperandMatchResultTy
5542 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5543   SMLoc S = Parser.getTok().getLoc();
5544   int64_t Imm = 0;
5545 
5546   if (trySkipId("offset")) {
5547 
5548     bool Ok = false;
5549     if (skipToken(AsmToken::Colon, "expected a colon")) {
5550       if (trySkipId("swizzle")) {
5551         Ok = parseSwizzleMacro(Imm);
5552       } else {
5553         Ok = parseSwizzleOffset(Imm);
5554       }
5555     }
5556 
5557     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5558 
5559     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5560   } else {
5561     // Swizzle "offset" operand is optional.
5562     // If it is omitted, try parsing other optional operands.
5563     return parseOptionalOpr(Operands);
5564   }
5565 }
5566 
5567 bool
5568 AMDGPUOperand::isSwizzle() const {
5569   return isImmTy(ImmTySwizzle);
5570 }
5571 
5572 //===----------------------------------------------------------------------===//
5573 // VGPR Index Mode
5574 //===----------------------------------------------------------------------===//
5575 
5576 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5577 
5578   using namespace llvm::AMDGPU::VGPRIndexMode;
5579 
5580   if (trySkipToken(AsmToken::RParen)) {
5581     return OFF;
5582   }
5583 
5584   int64_t Imm = 0;
5585 
5586   while (true) {
5587     unsigned Mode = 0;
5588     SMLoc S = Parser.getTok().getLoc();
5589 
5590     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5591       if (trySkipId(IdSymbolic[ModeId])) {
5592         Mode = 1 << ModeId;
5593         break;
5594       }
5595     }
5596 
5597     if (Mode == 0) {
5598       Error(S, (Imm == 0)?
5599                "expected a VGPR index mode or a closing parenthesis" :
5600                "expected a VGPR index mode");
5601       break;
5602     }
5603 
5604     if (Imm & Mode) {
5605       Error(S, "duplicate VGPR index mode");
5606       break;
5607     }
5608     Imm |= Mode;
5609 
5610     if (trySkipToken(AsmToken::RParen))
5611       break;
5612     if (!skipToken(AsmToken::Comma,
5613                    "expected a comma or a closing parenthesis"))
5614       break;
5615   }
5616 
5617   return Imm;
5618 }
5619 
5620 OperandMatchResultTy
5621 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5622 
5623   int64_t Imm = 0;
5624   SMLoc S = Parser.getTok().getLoc();
5625 
5626   if (getLexer().getKind() == AsmToken::Identifier &&
5627       Parser.getTok().getString() == "gpr_idx" &&
5628       getLexer().peekTok().is(AsmToken::LParen)) {
5629 
5630     Parser.Lex();
5631     Parser.Lex();
5632 
5633     // If parse failed, trigger an error but do not return error code
5634     // to avoid excessive error messages.
5635     Imm = parseGPRIdxMacro();
5636 
5637   } else {
5638     if (getParser().parseAbsoluteExpression(Imm))
5639       return MatchOperand_NoMatch;
5640     if (Imm < 0 || !isUInt<4>(Imm)) {
5641       Error(S, "invalid immediate: only 4-bit values are legal");
5642     }
5643   }
5644 
5645   Operands.push_back(
5646       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5647   return MatchOperand_Success;
5648 }
5649 
5650 bool AMDGPUOperand::isGPRIdxMode() const {
5651   return isImmTy(ImmTyGprIdxMode);
5652 }
5653 
5654 //===----------------------------------------------------------------------===//
5655 // sopp branch targets
5656 //===----------------------------------------------------------------------===//
5657 
5658 OperandMatchResultTy
5659 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5660 
5661   // Make sure we are not parsing something
5662   // that looks like a label or an expression but is not.
5663   // This will improve error messages.
5664   if (isRegister() || isModifier())
5665     return MatchOperand_NoMatch;
5666 
5667   if (parseExpr(Operands)) {
5668 
5669     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5670     assert(Opr.isImm() || Opr.isExpr());
5671     SMLoc Loc = Opr.getStartLoc();
5672 
5673     // Currently we do not support arbitrary expressions as branch targets.
5674     // Only labels and absolute expressions are accepted.
5675     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5676       Error(Loc, "expected an absolute expression or a label");
5677     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5678       Error(Loc, "expected a 16-bit signed jump offset");
5679     }
5680   }
5681 
5682   return MatchOperand_Success; // avoid excessive error messages
5683 }
5684 
5685 //===----------------------------------------------------------------------===//
5686 // Boolean holding registers
5687 //===----------------------------------------------------------------------===//
5688 
5689 OperandMatchResultTy
5690 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5691   return parseReg(Operands);
5692 }
5693 
5694 //===----------------------------------------------------------------------===//
5695 // mubuf
5696 //===----------------------------------------------------------------------===//
5697 
5698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5699   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5700 }
5701 
5702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5703   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5704 }
5705 
5706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5707   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5708 }
5709 
5710 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5711                                const OperandVector &Operands,
5712                                bool IsAtomic,
5713                                bool IsAtomicReturn,
5714                                bool IsLds) {
5715   bool IsLdsOpcode = IsLds;
5716   bool HasLdsModifier = false;
5717   OptionalImmIndexMap OptionalIdx;
5718   assert(IsAtomicReturn ? IsAtomic : true);
5719   unsigned FirstOperandIdx = 1;
5720 
5721   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5722     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5723 
5724     // Add the register arguments
5725     if (Op.isReg()) {
5726       Op.addRegOperands(Inst, 1);
5727       // Insert a tied src for atomic return dst.
5728       // This cannot be postponed as subsequent calls to
5729       // addImmOperands rely on correct number of MC operands.
5730       if (IsAtomicReturn && i == FirstOperandIdx)
5731         Op.addRegOperands(Inst, 1);
5732       continue;
5733     }
5734 
5735     // Handle the case where soffset is an immediate
5736     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5737       Op.addImmOperands(Inst, 1);
5738       continue;
5739     }
5740 
5741     HasLdsModifier |= Op.isLDS();
5742 
5743     // Handle tokens like 'offen' which are sometimes hard-coded into the
5744     // asm string.  There are no MCInst operands for these.
5745     if (Op.isToken()) {
5746       continue;
5747     }
5748     assert(Op.isImm());
5749 
5750     // Handle optional arguments
5751     OptionalIdx[Op.getImmTy()] = i;
5752   }
5753 
5754   // This is a workaround for an llvm quirk which may result in an
5755   // incorrect instruction selection. Lds and non-lds versions of
5756   // MUBUF instructions are identical except that lds versions
5757   // have mandatory 'lds' modifier. However this modifier follows
5758   // optional modifiers and llvm asm matcher regards this 'lds'
5759   // modifier as an optional one. As a result, an lds version
5760   // of opcode may be selected even if it has no 'lds' modifier.
5761   if (IsLdsOpcode && !HasLdsModifier) {
5762     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5763     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5764       Inst.setOpcode(NoLdsOpcode);
5765       IsLdsOpcode = false;
5766     }
5767   }
5768 
5769   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5770   if (!IsAtomic) { // glc is hard-coded.
5771     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5772   }
5773   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5774 
5775   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5776     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5777   }
5778 
5779   if (isGFX10())
5780     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5781 }
5782 
5783 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5784   OptionalImmIndexMap OptionalIdx;
5785 
5786   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5787     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5788 
5789     // Add the register arguments
5790     if (Op.isReg()) {
5791       Op.addRegOperands(Inst, 1);
5792       continue;
5793     }
5794 
5795     // Handle the case where soffset is an immediate
5796     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5797       Op.addImmOperands(Inst, 1);
5798       continue;
5799     }
5800 
5801     // Handle tokens like 'offen' which are sometimes hard-coded into the
5802     // asm string.  There are no MCInst operands for these.
5803     if (Op.isToken()) {
5804       continue;
5805     }
5806     assert(Op.isImm());
5807 
5808     // Handle optional arguments
5809     OptionalIdx[Op.getImmTy()] = i;
5810   }
5811 
5812   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5813                         AMDGPUOperand::ImmTyOffset);
5814   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5815   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5816   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5817   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5818 
5819   if (isGFX10())
5820     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5821 }
5822 
5823 //===----------------------------------------------------------------------===//
5824 // mimg
5825 //===----------------------------------------------------------------------===//
5826 
5827 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5828                               bool IsAtomic) {
5829   unsigned I = 1;
5830   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5831   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5832     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5833   }
5834 
5835   if (IsAtomic) {
5836     // Add src, same as dst
5837     assert(Desc.getNumDefs() == 1);
5838     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5839   }
5840 
5841   OptionalImmIndexMap OptionalIdx;
5842 
5843   for (unsigned E = Operands.size(); I != E; ++I) {
5844     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5845 
5846     // Add the register arguments
5847     if (Op.isReg()) {
5848       Op.addRegOperands(Inst, 1);
5849     } else if (Op.isImmModifier()) {
5850       OptionalIdx[Op.getImmTy()] = I;
5851     } else if (!Op.isToken()) {
5852       llvm_unreachable("unexpected operand type");
5853     }
5854   }
5855 
5856   bool IsGFX10 = isGFX10();
5857 
5858   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5859   if (IsGFX10)
5860     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5861   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5862   if (IsGFX10)
5863     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5864   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5865   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5866   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5867   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5868   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5869   if (!IsGFX10)
5870     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5871   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5872 }
5873 
5874 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5875   cvtMIMG(Inst, Operands, true);
5876 }
5877 
5878 //===----------------------------------------------------------------------===//
5879 // smrd
5880 //===----------------------------------------------------------------------===//
5881 
5882 bool AMDGPUOperand::isSMRDOffset8() const {
5883   return isImm() && isUInt<8>(getImm());
5884 }
5885 
5886 bool AMDGPUOperand::isSMRDOffset20() const {
5887   return isImm() && isUInt<20>(getImm());
5888 }
5889 
5890 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5891   // 32-bit literals are only supported on CI and we only want to use them
5892   // when the offset is > 8-bits.
5893   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5894 }
5895 
5896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5897   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5898 }
5899 
5900 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5901   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5902 }
5903 
5904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5905   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5906 }
5907 
5908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5909   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5910 }
5911 
5912 //===----------------------------------------------------------------------===//
5913 // vop3
5914 //===----------------------------------------------------------------------===//
5915 
5916 static bool ConvertOmodMul(int64_t &Mul) {
5917   if (Mul != 1 && Mul != 2 && Mul != 4)
5918     return false;
5919 
5920   Mul >>= 1;
5921   return true;
5922 }
5923 
5924 static bool ConvertOmodDiv(int64_t &Div) {
5925   if (Div == 1) {
5926     Div = 0;
5927     return true;
5928   }
5929 
5930   if (Div == 2) {
5931     Div = 3;
5932     return true;
5933   }
5934 
5935   return false;
5936 }
5937 
5938 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5939   if (BoundCtrl == 0) {
5940     BoundCtrl = 1;
5941     return true;
5942   }
5943 
5944   if (BoundCtrl == -1) {
5945     BoundCtrl = 0;
5946     return true;
5947   }
5948 
5949   return false;
5950 }
5951 
5952 // Note: the order in this table matches the order of operands in AsmString.
5953 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5954   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5955   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5956   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5957   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5958   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5959   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5960   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5961   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5962   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5963   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5964   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5965   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5966   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5967   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5968   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5969   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5970   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5971   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5972   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5973   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5974   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5975   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5976   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5977   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5978   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5979   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5980   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5981   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5982   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5983   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5984   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5985   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5986   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5987   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5988   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5989   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5990   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5991   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5992   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5993   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5994   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5995   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5996   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5997 };
5998 
5999 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6000   unsigned size = Operands.size();
6001   assert(size > 0);
6002 
6003   OperandMatchResultTy res = parseOptionalOpr(Operands);
6004 
6005   // This is a hack to enable hardcoded mandatory operands which follow
6006   // optional operands.
6007   //
6008   // Current design assumes that all operands after the first optional operand
6009   // are also optional. However implementation of some instructions violates
6010   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6011   //
6012   // To alleviate this problem, we have to (implicitly) parse extra operands
6013   // to make sure autogenerated parser of custom operands never hit hardcoded
6014   // mandatory operands.
6015 
6016   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6017 
6018     // We have parsed the first optional operand.
6019     // Parse as many operands as necessary to skip all mandatory operands.
6020 
6021     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6022       if (res != MatchOperand_Success ||
6023           getLexer().is(AsmToken::EndOfStatement)) break;
6024       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6025       res = parseOptionalOpr(Operands);
6026     }
6027   }
6028 
6029   return res;
6030 }
6031 
6032 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6033   OperandMatchResultTy res;
6034   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6035     // try to parse any optional operand here
6036     if (Op.IsBit) {
6037       res = parseNamedBit(Op.Name, Operands, Op.Type);
6038     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6039       res = parseOModOperand(Operands);
6040     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6041                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6042                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6043       res = parseSDWASel(Operands, Op.Name, Op.Type);
6044     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6045       res = parseSDWADstUnused(Operands);
6046     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6047                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6048                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6049                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6050       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6051                                         Op.ConvertResult);
6052     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6053       res = parseDim(Operands);
6054     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6055       res = parseDfmtNfmt(Operands);
6056     } else {
6057       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6058     }
6059     if (res != MatchOperand_NoMatch) {
6060       return res;
6061     }
6062   }
6063   return MatchOperand_NoMatch;
6064 }
6065 
6066 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6067   StringRef Name = Parser.getTok().getString();
6068   if (Name == "mul") {
6069     return parseIntWithPrefix("mul", Operands,
6070                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6071   }
6072 
6073   if (Name == "div") {
6074     return parseIntWithPrefix("div", Operands,
6075                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6076   }
6077 
6078   return MatchOperand_NoMatch;
6079 }
6080 
6081 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6082   cvtVOP3P(Inst, Operands);
6083 
6084   int Opc = Inst.getOpcode();
6085 
6086   int SrcNum;
6087   const int Ops[] = { AMDGPU::OpName::src0,
6088                       AMDGPU::OpName::src1,
6089                       AMDGPU::OpName::src2 };
6090   for (SrcNum = 0;
6091        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6092        ++SrcNum);
6093   assert(SrcNum > 0);
6094 
6095   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6096   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6097 
6098   if ((OpSel & (1 << SrcNum)) != 0) {
6099     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6100     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6101     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6102   }
6103 }
6104 
6105 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6106       // 1. This operand is input modifiers
6107   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6108       // 2. This is not last operand
6109       && Desc.NumOperands > (OpNum + 1)
6110       // 3. Next operand is register class
6111       && Desc.OpInfo[OpNum + 1].RegClass != -1
6112       // 4. Next register is not tied to any other operand
6113       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6114 }
6115 
6116 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6117 {
6118   OptionalImmIndexMap OptionalIdx;
6119   unsigned Opc = Inst.getOpcode();
6120 
6121   unsigned I = 1;
6122   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6123   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6124     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6125   }
6126 
6127   for (unsigned E = Operands.size(); I != E; ++I) {
6128     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6129     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6130       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6131     } else if (Op.isInterpSlot() ||
6132                Op.isInterpAttr() ||
6133                Op.isAttrChan()) {
6134       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6135     } else if (Op.isImmModifier()) {
6136       OptionalIdx[Op.getImmTy()] = I;
6137     } else {
6138       llvm_unreachable("unhandled operand type");
6139     }
6140   }
6141 
6142   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6143     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6144   }
6145 
6146   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6147     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6148   }
6149 
6150   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6151     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6152   }
6153 }
6154 
6155 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6156                               OptionalImmIndexMap &OptionalIdx) {
6157   unsigned Opc = Inst.getOpcode();
6158 
6159   unsigned I = 1;
6160   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6161   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6162     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6163   }
6164 
6165   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6166     // This instruction has src modifiers
6167     for (unsigned E = Operands.size(); I != E; ++I) {
6168       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6169       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6170         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6171       } else if (Op.isImmModifier()) {
6172         OptionalIdx[Op.getImmTy()] = I;
6173       } else if (Op.isRegOrImm()) {
6174         Op.addRegOrImmOperands(Inst, 1);
6175       } else {
6176         llvm_unreachable("unhandled operand type");
6177       }
6178     }
6179   } else {
6180     // No src modifiers
6181     for (unsigned E = Operands.size(); I != E; ++I) {
6182       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6183       if (Op.isMod()) {
6184         OptionalIdx[Op.getImmTy()] = I;
6185       } else {
6186         Op.addRegOrImmOperands(Inst, 1);
6187       }
6188     }
6189   }
6190 
6191   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6192     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6193   }
6194 
6195   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6196     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6197   }
6198 
6199   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6200   // it has src2 register operand that is tied to dst operand
6201   // we don't allow modifiers for this operand in assembler so src2_modifiers
6202   // should be 0.
6203   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6204       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6205       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6206       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6207       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6208       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6209       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6210     auto it = Inst.begin();
6211     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6212     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6213     ++it;
6214     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6215   }
6216 }
6217 
6218 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6219   OptionalImmIndexMap OptionalIdx;
6220   cvtVOP3(Inst, Operands, OptionalIdx);
6221 }
6222 
6223 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6224                                const OperandVector &Operands) {
6225   OptionalImmIndexMap OptIdx;
6226   const int Opc = Inst.getOpcode();
6227   const MCInstrDesc &Desc = MII.get(Opc);
6228 
6229   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6230 
6231   cvtVOP3(Inst, Operands, OptIdx);
6232 
6233   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6234     assert(!IsPacked);
6235     Inst.addOperand(Inst.getOperand(0));
6236   }
6237 
6238   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6239   // instruction, and then figure out where to actually put the modifiers
6240 
6241   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6242 
6243   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6244   if (OpSelHiIdx != -1) {
6245     int DefaultVal = IsPacked ? -1 : 0;
6246     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6247                           DefaultVal);
6248   }
6249 
6250   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6251   if (NegLoIdx != -1) {
6252     assert(IsPacked);
6253     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6254     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6255   }
6256 
6257   const int Ops[] = { AMDGPU::OpName::src0,
6258                       AMDGPU::OpName::src1,
6259                       AMDGPU::OpName::src2 };
6260   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6261                          AMDGPU::OpName::src1_modifiers,
6262                          AMDGPU::OpName::src2_modifiers };
6263 
6264   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6265 
6266   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6267   unsigned OpSelHi = 0;
6268   unsigned NegLo = 0;
6269   unsigned NegHi = 0;
6270 
6271   if (OpSelHiIdx != -1) {
6272     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6273   }
6274 
6275   if (NegLoIdx != -1) {
6276     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6277     NegLo = Inst.getOperand(NegLoIdx).getImm();
6278     NegHi = Inst.getOperand(NegHiIdx).getImm();
6279   }
6280 
6281   for (int J = 0; J < 3; ++J) {
6282     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6283     if (OpIdx == -1)
6284       break;
6285 
6286     uint32_t ModVal = 0;
6287 
6288     if ((OpSel & (1 << J)) != 0)
6289       ModVal |= SISrcMods::OP_SEL_0;
6290 
6291     if ((OpSelHi & (1 << J)) != 0)
6292       ModVal |= SISrcMods::OP_SEL_1;
6293 
6294     if ((NegLo & (1 << J)) != 0)
6295       ModVal |= SISrcMods::NEG;
6296 
6297     if ((NegHi & (1 << J)) != 0)
6298       ModVal |= SISrcMods::NEG_HI;
6299 
6300     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6301 
6302     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6303   }
6304 }
6305 
6306 //===----------------------------------------------------------------------===//
6307 // dpp
6308 //===----------------------------------------------------------------------===//
6309 
6310 bool AMDGPUOperand::isDPP8() const {
6311   return isImmTy(ImmTyDPP8);
6312 }
6313 
6314 bool AMDGPUOperand::isDPPCtrl() const {
6315   using namespace AMDGPU::DPP;
6316 
6317   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6318   if (result) {
6319     int64_t Imm = getImm();
6320     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6321            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6322            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6323            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6324            (Imm == DppCtrl::WAVE_SHL1) ||
6325            (Imm == DppCtrl::WAVE_ROL1) ||
6326            (Imm == DppCtrl::WAVE_SHR1) ||
6327            (Imm == DppCtrl::WAVE_ROR1) ||
6328            (Imm == DppCtrl::ROW_MIRROR) ||
6329            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6330            (Imm == DppCtrl::BCAST15) ||
6331            (Imm == DppCtrl::BCAST31) ||
6332            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6333            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6334   }
6335   return false;
6336 }
6337 
6338 //===----------------------------------------------------------------------===//
6339 // mAI
6340 //===----------------------------------------------------------------------===//
6341 
6342 bool AMDGPUOperand::isBLGP() const {
6343   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6344 }
6345 
6346 bool AMDGPUOperand::isCBSZ() const {
6347   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6348 }
6349 
6350 bool AMDGPUOperand::isABID() const {
6351   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6352 }
6353 
6354 bool AMDGPUOperand::isS16Imm() const {
6355   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6356 }
6357 
6358 bool AMDGPUOperand::isU16Imm() const {
6359   return isImm() && isUInt<16>(getImm());
6360 }
6361 
6362 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6363   if (!isGFX10())
6364     return MatchOperand_NoMatch;
6365 
6366   SMLoc S = Parser.getTok().getLoc();
6367 
6368   if (getLexer().isNot(AsmToken::Identifier))
6369     return MatchOperand_NoMatch;
6370   if (getLexer().getTok().getString() != "dim")
6371     return MatchOperand_NoMatch;
6372 
6373   Parser.Lex();
6374   if (getLexer().isNot(AsmToken::Colon))
6375     return MatchOperand_ParseFail;
6376 
6377   Parser.Lex();
6378 
6379   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6380   // integer.
6381   std::string Token;
6382   if (getLexer().is(AsmToken::Integer)) {
6383     SMLoc Loc = getLexer().getTok().getEndLoc();
6384     Token = getLexer().getTok().getString();
6385     Parser.Lex();
6386     if (getLexer().getTok().getLoc() != Loc)
6387       return MatchOperand_ParseFail;
6388   }
6389   if (getLexer().isNot(AsmToken::Identifier))
6390     return MatchOperand_ParseFail;
6391   Token += getLexer().getTok().getString();
6392 
6393   StringRef DimId = Token;
6394   if (DimId.startswith("SQ_RSRC_IMG_"))
6395     DimId = DimId.substr(12);
6396 
6397   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6398   if (!DimInfo)
6399     return MatchOperand_ParseFail;
6400 
6401   Parser.Lex();
6402 
6403   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6404                                               AMDGPUOperand::ImmTyDim));
6405   return MatchOperand_Success;
6406 }
6407 
6408 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6409   SMLoc S = Parser.getTok().getLoc();
6410   StringRef Prefix;
6411 
6412   if (getLexer().getKind() == AsmToken::Identifier) {
6413     Prefix = Parser.getTok().getString();
6414   } else {
6415     return MatchOperand_NoMatch;
6416   }
6417 
6418   if (Prefix != "dpp8")
6419     return parseDPPCtrl(Operands);
6420   if (!isGFX10())
6421     return MatchOperand_NoMatch;
6422 
6423   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6424 
6425   int64_t Sels[8];
6426 
6427   Parser.Lex();
6428   if (getLexer().isNot(AsmToken::Colon))
6429     return MatchOperand_ParseFail;
6430 
6431   Parser.Lex();
6432   if (getLexer().isNot(AsmToken::LBrac))
6433     return MatchOperand_ParseFail;
6434 
6435   Parser.Lex();
6436   if (getParser().parseAbsoluteExpression(Sels[0]))
6437     return MatchOperand_ParseFail;
6438   if (0 > Sels[0] || 7 < Sels[0])
6439     return MatchOperand_ParseFail;
6440 
6441   for (size_t i = 1; i < 8; ++i) {
6442     if (getLexer().isNot(AsmToken::Comma))
6443       return MatchOperand_ParseFail;
6444 
6445     Parser.Lex();
6446     if (getParser().parseAbsoluteExpression(Sels[i]))
6447       return MatchOperand_ParseFail;
6448     if (0 > Sels[i] || 7 < Sels[i])
6449       return MatchOperand_ParseFail;
6450   }
6451 
6452   if (getLexer().isNot(AsmToken::RBrac))
6453     return MatchOperand_ParseFail;
6454   Parser.Lex();
6455 
6456   unsigned DPP8 = 0;
6457   for (size_t i = 0; i < 8; ++i)
6458     DPP8 |= (Sels[i] << (i * 3));
6459 
6460   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6461   return MatchOperand_Success;
6462 }
6463 
6464 OperandMatchResultTy
6465 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6466   using namespace AMDGPU::DPP;
6467 
6468   SMLoc S = Parser.getTok().getLoc();
6469   StringRef Prefix;
6470   int64_t Int;
6471 
6472   if (getLexer().getKind() == AsmToken::Identifier) {
6473     Prefix = Parser.getTok().getString();
6474   } else {
6475     return MatchOperand_NoMatch;
6476   }
6477 
6478   if (Prefix == "row_mirror") {
6479     Int = DppCtrl::ROW_MIRROR;
6480     Parser.Lex();
6481   } else if (Prefix == "row_half_mirror") {
6482     Int = DppCtrl::ROW_HALF_MIRROR;
6483     Parser.Lex();
6484   } else {
6485     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6486     if (Prefix != "quad_perm"
6487         && Prefix != "row_shl"
6488         && Prefix != "row_shr"
6489         && Prefix != "row_ror"
6490         && Prefix != "wave_shl"
6491         && Prefix != "wave_rol"
6492         && Prefix != "wave_shr"
6493         && Prefix != "wave_ror"
6494         && Prefix != "row_bcast"
6495         && Prefix != "row_share"
6496         && Prefix != "row_xmask") {
6497       return MatchOperand_NoMatch;
6498     }
6499 
6500     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6501       return MatchOperand_NoMatch;
6502 
6503     if (!isVI() && !isGFX9() &&
6504         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6505          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6506          Prefix == "row_bcast"))
6507       return MatchOperand_NoMatch;
6508 
6509     Parser.Lex();
6510     if (getLexer().isNot(AsmToken::Colon))
6511       return MatchOperand_ParseFail;
6512 
6513     if (Prefix == "quad_perm") {
6514       // quad_perm:[%d,%d,%d,%d]
6515       Parser.Lex();
6516       if (getLexer().isNot(AsmToken::LBrac))
6517         return MatchOperand_ParseFail;
6518       Parser.Lex();
6519 
6520       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6521         return MatchOperand_ParseFail;
6522 
6523       for (int i = 0; i < 3; ++i) {
6524         if (getLexer().isNot(AsmToken::Comma))
6525           return MatchOperand_ParseFail;
6526         Parser.Lex();
6527 
6528         int64_t Temp;
6529         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6530           return MatchOperand_ParseFail;
6531         const int shift = i*2 + 2;
6532         Int += (Temp << shift);
6533       }
6534 
6535       if (getLexer().isNot(AsmToken::RBrac))
6536         return MatchOperand_ParseFail;
6537       Parser.Lex();
6538     } else {
6539       // sel:%d
6540       Parser.Lex();
6541       if (getParser().parseAbsoluteExpression(Int))
6542         return MatchOperand_ParseFail;
6543 
6544       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6545         Int |= DppCtrl::ROW_SHL0;
6546       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6547         Int |= DppCtrl::ROW_SHR0;
6548       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6549         Int |= DppCtrl::ROW_ROR0;
6550       } else if (Prefix == "wave_shl" && 1 == Int) {
6551         Int = DppCtrl::WAVE_SHL1;
6552       } else if (Prefix == "wave_rol" && 1 == Int) {
6553         Int = DppCtrl::WAVE_ROL1;
6554       } else if (Prefix == "wave_shr" && 1 == Int) {
6555         Int = DppCtrl::WAVE_SHR1;
6556       } else if (Prefix == "wave_ror" && 1 == Int) {
6557         Int = DppCtrl::WAVE_ROR1;
6558       } else if (Prefix == "row_bcast") {
6559         if (Int == 15) {
6560           Int = DppCtrl::BCAST15;
6561         } else if (Int == 31) {
6562           Int = DppCtrl::BCAST31;
6563         } else {
6564           return MatchOperand_ParseFail;
6565         }
6566       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6567         Int |= DppCtrl::ROW_SHARE_FIRST;
6568       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6569         Int |= DppCtrl::ROW_XMASK_FIRST;
6570       } else {
6571         return MatchOperand_ParseFail;
6572       }
6573     }
6574   }
6575 
6576   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6577   return MatchOperand_Success;
6578 }
6579 
6580 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6581   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6582 }
6583 
6584 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6585   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6586 }
6587 
6588 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6589   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6590 }
6591 
6592 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6593   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6594 }
6595 
6596 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6597   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6598 }
6599 
6600 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6601   OptionalImmIndexMap OptionalIdx;
6602 
6603   unsigned I = 1;
6604   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6605   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6606     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6607   }
6608 
6609   int Fi = 0;
6610   for (unsigned E = Operands.size(); I != E; ++I) {
6611     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6612                                             MCOI::TIED_TO);
6613     if (TiedTo != -1) {
6614       assert((unsigned)TiedTo < Inst.getNumOperands());
6615       // handle tied old or src2 for MAC instructions
6616       Inst.addOperand(Inst.getOperand(TiedTo));
6617     }
6618     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6619     // Add the register arguments
6620     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6621       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6622       // Skip it.
6623       continue;
6624     }
6625 
6626     if (IsDPP8) {
6627       if (Op.isDPP8()) {
6628         Op.addImmOperands(Inst, 1);
6629       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6630         Op.addRegWithFPInputModsOperands(Inst, 2);
6631       } else if (Op.isFI()) {
6632         Fi = Op.getImm();
6633       } else if (Op.isReg()) {
6634         Op.addRegOperands(Inst, 1);
6635       } else {
6636         llvm_unreachable("Invalid operand type");
6637       }
6638     } else {
6639       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6640         Op.addRegWithFPInputModsOperands(Inst, 2);
6641       } else if (Op.isDPPCtrl()) {
6642         Op.addImmOperands(Inst, 1);
6643       } else if (Op.isImm()) {
6644         // Handle optional arguments
6645         OptionalIdx[Op.getImmTy()] = I;
6646       } else {
6647         llvm_unreachable("Invalid operand type");
6648       }
6649     }
6650   }
6651 
6652   if (IsDPP8) {
6653     using namespace llvm::AMDGPU::DPP;
6654     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6655   } else {
6656     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6657     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6658     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6659     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6660       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6661     }
6662   }
6663 }
6664 
6665 //===----------------------------------------------------------------------===//
6666 // sdwa
6667 //===----------------------------------------------------------------------===//
6668 
6669 OperandMatchResultTy
6670 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6671                               AMDGPUOperand::ImmTy Type) {
6672   using namespace llvm::AMDGPU::SDWA;
6673 
6674   SMLoc S = Parser.getTok().getLoc();
6675   StringRef Value;
6676   OperandMatchResultTy res;
6677 
6678   res = parseStringWithPrefix(Prefix, Value);
6679   if (res != MatchOperand_Success) {
6680     return res;
6681   }
6682 
6683   int64_t Int;
6684   Int = StringSwitch<int64_t>(Value)
6685         .Case("BYTE_0", SdwaSel::BYTE_0)
6686         .Case("BYTE_1", SdwaSel::BYTE_1)
6687         .Case("BYTE_2", SdwaSel::BYTE_2)
6688         .Case("BYTE_3", SdwaSel::BYTE_3)
6689         .Case("WORD_0", SdwaSel::WORD_0)
6690         .Case("WORD_1", SdwaSel::WORD_1)
6691         .Case("DWORD", SdwaSel::DWORD)
6692         .Default(0xffffffff);
6693   Parser.Lex(); // eat last token
6694 
6695   if (Int == 0xffffffff) {
6696     return MatchOperand_ParseFail;
6697   }
6698 
6699   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6700   return MatchOperand_Success;
6701 }
6702 
6703 OperandMatchResultTy
6704 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6705   using namespace llvm::AMDGPU::SDWA;
6706 
6707   SMLoc S = Parser.getTok().getLoc();
6708   StringRef Value;
6709   OperandMatchResultTy res;
6710 
6711   res = parseStringWithPrefix("dst_unused", Value);
6712   if (res != MatchOperand_Success) {
6713     return res;
6714   }
6715 
6716   int64_t Int;
6717   Int = StringSwitch<int64_t>(Value)
6718         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6719         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6720         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6721         .Default(0xffffffff);
6722   Parser.Lex(); // eat last token
6723 
6724   if (Int == 0xffffffff) {
6725     return MatchOperand_ParseFail;
6726   }
6727 
6728   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6729   return MatchOperand_Success;
6730 }
6731 
6732 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6733   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6734 }
6735 
6736 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6737   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6738 }
6739 
6740 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6741   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6742 }
6743 
6744 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6745   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6746 }
6747 
6748 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6749                               uint64_t BasicInstType, bool skipVcc) {
6750   using namespace llvm::AMDGPU::SDWA;
6751 
6752   OptionalImmIndexMap OptionalIdx;
6753   bool skippedVcc = false;
6754 
6755   unsigned I = 1;
6756   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6757   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6758     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6759   }
6760 
6761   for (unsigned E = Operands.size(); I != E; ++I) {
6762     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6763     if (skipVcc && !skippedVcc && Op.isReg() &&
6764         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6765       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6766       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6767       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6768       // Skip VCC only if we didn't skip it on previous iteration.
6769       if (BasicInstType == SIInstrFlags::VOP2 &&
6770           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6771         skippedVcc = true;
6772         continue;
6773       } else if (BasicInstType == SIInstrFlags::VOPC &&
6774                  Inst.getNumOperands() == 0) {
6775         skippedVcc = true;
6776         continue;
6777       }
6778     }
6779     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6780       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6781     } else if (Op.isImm()) {
6782       // Handle optional arguments
6783       OptionalIdx[Op.getImmTy()] = I;
6784     } else {
6785       llvm_unreachable("Invalid operand type");
6786     }
6787     skippedVcc = false;
6788   }
6789 
6790   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6791       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6792       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6793     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6794     switch (BasicInstType) {
6795     case SIInstrFlags::VOP1:
6796       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6797       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6798         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6799       }
6800       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6801       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6802       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6803       break;
6804 
6805     case SIInstrFlags::VOP2:
6806       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6807       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6808         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6809       }
6810       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6811       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6812       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6813       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6814       break;
6815 
6816     case SIInstrFlags::VOPC:
6817       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6818         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6819       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6820       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6821       break;
6822 
6823     default:
6824       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6825     }
6826   }
6827 
6828   // special case v_mac_{f16, f32}:
6829   // it has src2 register operand that is tied to dst operand
6830   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6831       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6832     auto it = Inst.begin();
6833     std::advance(
6834       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6835     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6836   }
6837 }
6838 
6839 //===----------------------------------------------------------------------===//
6840 // mAI
6841 //===----------------------------------------------------------------------===//
6842 
6843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6844   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6845 }
6846 
6847 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6848   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6849 }
6850 
6851 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6852   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6853 }
6854 
6855 /// Force static initialization.
6856 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6857   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6858   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6859 }
6860 
6861 #define GET_REGISTER_MATCHER
6862 #define GET_MATCHER_IMPLEMENTATION
6863 #define GET_MNEMONIC_SPELL_CHECKER
6864 #include "AMDGPUGenAsmMatcher.inc"
6865 
6866 // This fuction should be defined after auto-generated include so that we have
6867 // MatchClassKind enum defined
6868 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6869                                                      unsigned Kind) {
6870   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6871   // But MatchInstructionImpl() expects to meet token and fails to validate
6872   // operand. This method checks if we are given immediate operand but expect to
6873   // get corresponding token.
6874   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6875   switch (Kind) {
6876   case MCK_addr64:
6877     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6878   case MCK_gds:
6879     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6880   case MCK_lds:
6881     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6882   case MCK_glc:
6883     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6884   case MCK_idxen:
6885     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6886   case MCK_offen:
6887     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6888   case MCK_SSrcB32:
6889     // When operands have expression values, they will return true for isToken,
6890     // because it is not possible to distinguish between a token and an
6891     // expression at parse time. MatchInstructionImpl() will always try to
6892     // match an operand as a token, when isToken returns true, and when the
6893     // name of the expression is not a valid token, the match will fail,
6894     // so we need to handle it here.
6895     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6896   case MCK_SSrcF32:
6897     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6898   case MCK_SoppBrTarget:
6899     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6900   case MCK_VReg32OrOff:
6901     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6902   case MCK_InterpSlot:
6903     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6904   case MCK_Attr:
6905     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6906   case MCK_AttrChan:
6907     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6908   default:
6909     return Match_InvalidOperand;
6910   }
6911 }
6912 
6913 //===----------------------------------------------------------------------===//
6914 // endpgm
6915 //===----------------------------------------------------------------------===//
6916 
6917 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6918   SMLoc S = Parser.getTok().getLoc();
6919   int64_t Imm = 0;
6920 
6921   if (!parseExpr(Imm)) {
6922     // The operand is optional, if not present default to 0
6923     Imm = 0;
6924   }
6925 
6926   if (!isUInt<16>(Imm)) {
6927     Error(S, "expected a 16-bit value");
6928     return MatchOperand_ParseFail;
6929   }
6930 
6931   Operands.push_back(
6932       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6933   return MatchOperand_Success;
6934 }
6935 
6936 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6937