1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isSymbolRefExpr();
224   }
225 
226   bool isSymbolRefExpr() const {
227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228   }
229 
230   bool isImm() const override {
231     return Kind == Immediate;
232   }
233 
234   bool isInlinableImm(MVT type) const;
235   bool isLiteralImm(MVT type) const;
236 
237   bool isRegKind() const {
238     return Kind == Register;
239   }
240 
241   bool isReg() const override {
242     return isRegKind() && !hasModifiers();
243   }
244 
245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247   }
248 
249   bool isRegOrImmWithInt16InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251   }
252 
253   bool isRegOrImmWithInt32InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255   }
256 
257   bool isRegOrImmWithInt64InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259   }
260 
261   bool isRegOrImmWithFP16InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263   }
264 
265   bool isRegOrImmWithFP32InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267   }
268 
269   bool isRegOrImmWithFP64InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271   }
272 
273   bool isVReg() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275            isRegClass(AMDGPU::VReg_64RegClassID) ||
276            isRegClass(AMDGPU::VReg_96RegClassID) ||
277            isRegClass(AMDGPU::VReg_128RegClassID) ||
278            isRegClass(AMDGPU::VReg_160RegClassID) ||
279            isRegClass(AMDGPU::VReg_256RegClassID) ||
280            isRegClass(AMDGPU::VReg_512RegClassID) ||
281            isRegClass(AMDGPU::VReg_1024RegClassID);
282   }
283 
284   bool isVReg32() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID);
286   }
287 
288   bool isVReg32OrOff() const {
289     return isOff() || isVReg32();
290   }
291 
292   bool isSDWAOperand(MVT type) const;
293   bool isSDWAFP16Operand() const;
294   bool isSDWAFP32Operand() const;
295   bool isSDWAInt16Operand() const;
296   bool isSDWAInt32Operand() const;
297 
298   bool isImmTy(ImmTy ImmT) const {
299     return isImm() && Imm.Type == ImmT;
300   }
301 
302   bool isImmModifier() const {
303     return isImm() && Imm.Type != ImmTyNone;
304   }
305 
306   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308   bool isDMask() const { return isImmTy(ImmTyDMask); }
309   bool isDim() const { return isImmTy(ImmTyDim); }
310   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311   bool isDA() const { return isImmTy(ImmTyDA); }
312   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313   bool isLWE() const { return isImmTy(ImmTyLWE); }
314   bool isOff() const { return isImmTy(ImmTyOff); }
315   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318   bool isOffen() const { return isImmTy(ImmTyOffen); }
319   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326   bool isGDS() const { return isImmTy(ImmTyGDS); }
327   bool isLDS() const { return isImmTy(ImmTyLDS); }
328   bool isDLC() const { return isImmTy(ImmTyDLC); }
329   bool isGLC() const { return isImmTy(ImmTyGLC); }
330   bool isSLC() const { return isImmTy(ImmTySLC); }
331   bool isTFE() const { return isImmTy(ImmTyTFE); }
332   bool isD16() const { return isImmTy(ImmTyD16); }
333   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337   bool isFI() const { return isImmTy(ImmTyDppFi); }
338   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349   bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351   bool isMod() const {
352     return isClampSI() || isOModSI();
353   }
354 
355   bool isRegOrImm() const {
356     return isReg() || isImm();
357   }
358 
359   bool isRegClass(unsigned RCID) const;
360 
361   bool isInlineValue() const;
362 
363   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365   }
366 
367   bool isSCSrcB16() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369   }
370 
371   bool isSCSrcV2B16() const {
372     return isSCSrcB16();
373   }
374 
375   bool isSCSrcB32() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377   }
378 
379   bool isSCSrcB64() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381   }
382 
383   bool isBoolReg() const;
384 
385   bool isSCSrcF16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387   }
388 
389   bool isSCSrcV2F16() const {
390     return isSCSrcF16();
391   }
392 
393   bool isSCSrcF32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395   }
396 
397   bool isSCSrcF64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399   }
400 
401   bool isSSrcB32() const {
402     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403   }
404 
405   bool isSSrcB16() const {
406     return isSCSrcB16() || isLiteralImm(MVT::i16);
407   }
408 
409   bool isSSrcV2B16() const {
410     llvm_unreachable("cannot happen");
411     return isSSrcB16();
412   }
413 
414   bool isSSrcB64() const {
415     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416     // See isVSrc64().
417     return isSCSrcB64() || isLiteralImm(MVT::i64);
418   }
419 
420   bool isSSrcF32() const {
421     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422   }
423 
424   bool isSSrcF64() const {
425     return isSCSrcB64() || isLiteralImm(MVT::f64);
426   }
427 
428   bool isSSrcF16() const {
429     return isSCSrcB16() || isLiteralImm(MVT::f16);
430   }
431 
432   bool isSSrcV2F16() const {
433     llvm_unreachable("cannot happen");
434     return isSSrcF16();
435   }
436 
437   bool isSSrcOrLdsB32() const {
438     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439            isLiteralImm(MVT::i32) || isExpr();
440   }
441 
442   bool isVCSrcB32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444   }
445 
446   bool isVCSrcB64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448   }
449 
450   bool isVCSrcB16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452   }
453 
454   bool isVCSrcV2B16() const {
455     return isVCSrcB16();
456   }
457 
458   bool isVCSrcF32() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460   }
461 
462   bool isVCSrcF64() const {
463     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464   }
465 
466   bool isVCSrcF16() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468   }
469 
470   bool isVCSrcV2F16() const {
471     return isVCSrcF16();
472   }
473 
474   bool isVSrcB32() const {
475     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476   }
477 
478   bool isVSrcB64() const {
479     return isVCSrcF64() || isLiteralImm(MVT::i64);
480   }
481 
482   bool isVSrcB16() const {
483     return isVCSrcF16() || isLiteralImm(MVT::i16);
484   }
485 
486   bool isVSrcV2B16() const {
487     return isVSrcB16() || isLiteralImm(MVT::v2i16);
488   }
489 
490   bool isVSrcF32() const {
491     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492   }
493 
494   bool isVSrcF64() const {
495     return isVCSrcF64() || isLiteralImm(MVT::f64);
496   }
497 
498   bool isVSrcF16() const {
499     return isVCSrcF16() || isLiteralImm(MVT::f16);
500   }
501 
502   bool isVSrcV2F16() const {
503     return isVSrcF16() || isLiteralImm(MVT::v2f16);
504   }
505 
506   bool isVISrcB32() const {
507     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508   }
509 
510   bool isVISrcB16() const {
511     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512   }
513 
514   bool isVISrcV2B16() const {
515     return isVISrcB16();
516   }
517 
518   bool isVISrcF32() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520   }
521 
522   bool isVISrcF16() const {
523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524   }
525 
526   bool isVISrcV2F16() const {
527     return isVISrcF16() || isVISrcB32();
528   }
529 
530   bool isAISrcB32() const {
531     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532   }
533 
534   bool isAISrcB16() const {
535     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536   }
537 
538   bool isAISrcV2B16() const {
539     return isAISrcB16();
540   }
541 
542   bool isAISrcF32() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544   }
545 
546   bool isAISrcF16() const {
547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548   }
549 
550   bool isAISrcV2F16() const {
551     return isAISrcF16() || isAISrcB32();
552   }
553 
554   bool isAISrc_128B32() const {
555     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556   }
557 
558   bool isAISrc_128B16() const {
559     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560   }
561 
562   bool isAISrc_128V2B16() const {
563     return isAISrc_128B16();
564   }
565 
566   bool isAISrc_128F32() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568   }
569 
570   bool isAISrc_128F16() const {
571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572   }
573 
574   bool isAISrc_128V2F16() const {
575     return isAISrc_128F16() || isAISrc_128B32();
576   }
577 
578   bool isAISrc_512B32() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580   }
581 
582   bool isAISrc_512B16() const {
583     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584   }
585 
586   bool isAISrc_512V2B16() const {
587     return isAISrc_512B16();
588   }
589 
590   bool isAISrc_512F32() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592   }
593 
594   bool isAISrc_512F16() const {
595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596   }
597 
598   bool isAISrc_512V2F16() const {
599     return isAISrc_512F16() || isAISrc_512B32();
600   }
601 
602   bool isAISrc_1024B32() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604   }
605 
606   bool isAISrc_1024B16() const {
607     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608   }
609 
610   bool isAISrc_1024V2B16() const {
611     return isAISrc_1024B16();
612   }
613 
614   bool isAISrc_1024F32() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616   }
617 
618   bool isAISrc_1024F16() const {
619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620   }
621 
622   bool isAISrc_1024V2F16() const {
623     return isAISrc_1024F16() || isAISrc_1024B32();
624   }
625 
626   bool isKImmFP32() const {
627     return isLiteralImm(MVT::f32);
628   }
629 
630   bool isKImmFP16() const {
631     return isLiteralImm(MVT::f16);
632   }
633 
634   bool isMem() const override {
635     return false;
636   }
637 
638   bool isExpr() const {
639     return Kind == Expression;
640   }
641 
642   bool isSoppBrTarget() const {
643     return isExpr() || isImm();
644   }
645 
646   bool isSWaitCnt() const;
647   bool isHwreg() const;
648   bool isSendMsg() const;
649   bool isSwizzle() const;
650   bool isSMRDOffset8() const;
651   bool isSMRDOffset20() const;
652   bool isSMRDLiteralOffset() const;
653   bool isDPP8() const;
654   bool isDPPCtrl() const;
655   bool isBLGP() const;
656   bool isCBSZ() const;
657   bool isABID() const;
658   bool isGPRIdxMode() const;
659   bool isS16Imm() const;
660   bool isU16Imm() const;
661   bool isEndpgm() const;
662 
663   StringRef getExpressionAsToken() const {
664     assert(isExpr());
665     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666     return S->getSymbol().getName();
667   }
668 
669   StringRef getToken() const {
670     assert(isToken());
671 
672     if (Kind == Expression)
673       return getExpressionAsToken();
674 
675     return StringRef(Tok.Data, Tok.Length);
676   }
677 
678   int64_t getImm() const {
679     assert(isImm());
680     return Imm.Val;
681   }
682 
683   ImmTy getImmTy() const {
684     assert(isImm());
685     return Imm.Type;
686   }
687 
688   unsigned getReg() const override {
689     assert(isRegKind());
690     return Reg.RegNo;
691   }
692 
693   SMLoc getStartLoc() const override {
694     return StartLoc;
695   }
696 
697   SMLoc getEndLoc() const override {
698     return EndLoc;
699   }
700 
701   SMRange getLocRange() const {
702     return SMRange(StartLoc, EndLoc);
703   }
704 
705   Modifiers getModifiers() const {
706     assert(isRegKind() || isImmTy(ImmTyNone));
707     return isRegKind() ? Reg.Mods : Imm.Mods;
708   }
709 
710   void setModifiers(Modifiers Mods) {
711     assert(isRegKind() || isImmTy(ImmTyNone));
712     if (isRegKind())
713       Reg.Mods = Mods;
714     else
715       Imm.Mods = Mods;
716   }
717 
718   bool hasModifiers() const {
719     return getModifiers().hasModifiers();
720   }
721 
722   bool hasFPModifiers() const {
723     return getModifiers().hasFPModifiers();
724   }
725 
726   bool hasIntModifiers() const {
727     return getModifiers().hasIntModifiers();
728   }
729 
730   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736   template <unsigned Bitwidth>
737   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740     addKImmFPOperands<16>(Inst, N);
741   }
742 
743   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744     addKImmFPOperands<32>(Inst, N);
745   }
746 
747   void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750     addRegOperands(Inst, N);
751   }
752 
753   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754     if (isRegKind())
755       addRegOperands(Inst, N);
756     else if (isExpr())
757       Inst.addOperand(MCOperand::createExpr(Expr));
758     else
759       addImmOperands(Inst, N);
760   }
761 
762   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763     Modifiers Mods = getModifiers();
764     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765     if (isRegKind()) {
766       addRegOperands(Inst, N);
767     } else {
768       addImmOperands(Inst, N, false);
769     }
770   }
771 
772   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773     assert(!hasIntModifiers());
774     addRegOrImmWithInputModsOperands(Inst, N);
775   }
776 
777   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778     assert(!hasFPModifiers());
779     addRegOrImmWithInputModsOperands(Inst, N);
780   }
781 
782   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     assert(isRegKind());
786     addRegOperands(Inst, N);
787   }
788 
789   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790     assert(!hasIntModifiers());
791     addRegWithInputModsOperands(Inst, N);
792   }
793 
794   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795     assert(!hasFPModifiers());
796     addRegWithInputModsOperands(Inst, N);
797   }
798 
799   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800     if (isImm())
801       addImmOperands(Inst, N);
802     else {
803       assert(isExpr());
804       Inst.addOperand(MCOperand::createExpr(Expr));
805     }
806   }
807 
808   static void printImmTy(raw_ostream& OS, ImmTy Type) {
809     switch (Type) {
810     case ImmTyNone: OS << "None"; break;
811     case ImmTyGDS: OS << "GDS"; break;
812     case ImmTyLDS: OS << "LDS"; break;
813     case ImmTyOffen: OS << "Offen"; break;
814     case ImmTyIdxen: OS << "Idxen"; break;
815     case ImmTyAddr64: OS << "Addr64"; break;
816     case ImmTyOffset: OS << "Offset"; break;
817     case ImmTyInstOffset: OS << "InstOffset"; break;
818     case ImmTyOffset0: OS << "Offset0"; break;
819     case ImmTyOffset1: OS << "Offset1"; break;
820     case ImmTyDLC: OS << "DLC"; break;
821     case ImmTyGLC: OS << "GLC"; break;
822     case ImmTySLC: OS << "SLC"; break;
823     case ImmTyTFE: OS << "TFE"; break;
824     case ImmTyD16: OS << "D16"; break;
825     case ImmTyFORMAT: OS << "FORMAT"; break;
826     case ImmTyClampSI: OS << "ClampSI"; break;
827     case ImmTyOModSI: OS << "OModSI"; break;
828     case ImmTyDPP8: OS << "DPP8"; break;
829     case ImmTyDppCtrl: OS << "DppCtrl"; break;
830     case ImmTyDppRowMask: OS << "DppRowMask"; break;
831     case ImmTyDppBankMask: OS << "DppBankMask"; break;
832     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833     case ImmTyDppFi: OS << "FI"; break;
834     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838     case ImmTyDMask: OS << "DMask"; break;
839     case ImmTyDim: OS << "Dim"; break;
840     case ImmTyUNorm: OS << "UNorm"; break;
841     case ImmTyDA: OS << "DA"; break;
842     case ImmTyR128A16: OS << "R128A16"; break;
843     case ImmTyLWE: OS << "LWE"; break;
844     case ImmTyOff: OS << "Off"; break;
845     case ImmTyExpTgt: OS << "ExpTgt"; break;
846     case ImmTyExpCompr: OS << "ExpCompr"; break;
847     case ImmTyExpVM: OS << "ExpVM"; break;
848     case ImmTyHwreg: OS << "Hwreg"; break;
849     case ImmTySendMsg: OS << "SendMsg"; break;
850     case ImmTyInterpSlot: OS << "InterpSlot"; break;
851     case ImmTyInterpAttr: OS << "InterpAttr"; break;
852     case ImmTyAttrChan: OS << "AttrChan"; break;
853     case ImmTyOpSel: OS << "OpSel"; break;
854     case ImmTyOpSelHi: OS << "OpSelHi"; break;
855     case ImmTyNegLo: OS << "NegLo"; break;
856     case ImmTyNegHi: OS << "NegHi"; break;
857     case ImmTySwizzle: OS << "Swizzle"; break;
858     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859     case ImmTyHigh: OS << "High"; break;
860     case ImmTyBLGP: OS << "BLGP"; break;
861     case ImmTyCBSZ: OS << "CBSZ"; break;
862     case ImmTyABID: OS << "ABID"; break;
863     case ImmTyEndpgm: OS << "Endpgm"; break;
864     }
865   }
866 
867   void print(raw_ostream &OS) const override {
868     switch (Kind) {
869     case Register:
870       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871       break;
872     case Immediate:
873       OS << '<' << getImm();
874       if (getImmTy() != ImmTyNone) {
875         OS << " type: "; printImmTy(OS, getImmTy());
876       }
877       OS << " mods: " << Imm.Mods << '>';
878       break;
879     case Token:
880       OS << '\'' << getToken() << '\'';
881       break;
882     case Expression:
883       OS << "<expr " << *Expr << '>';
884       break;
885     }
886   }
887 
888   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889                                       int64_t Val, SMLoc Loc,
890                                       ImmTy Type = ImmTyNone,
891                                       bool IsFPImm = false) {
892     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893     Op->Imm.Val = Val;
894     Op->Imm.IsFPImm = IsFPImm;
895     Op->Imm.Type = Type;
896     Op->Imm.Mods = Modifiers();
897     Op->StartLoc = Loc;
898     Op->EndLoc = Loc;
899     return Op;
900   }
901 
902   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903                                         StringRef Str, SMLoc Loc,
904                                         bool HasExplicitEncodingSize = true) {
905     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906     Res->Tok.Data = Str.data();
907     Res->Tok.Length = Str.size();
908     Res->StartLoc = Loc;
909     Res->EndLoc = Loc;
910     return Res;
911   }
912 
913   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914                                       unsigned RegNo, SMLoc S,
915                                       SMLoc E) {
916     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917     Op->Reg.RegNo = RegNo;
918     Op->Reg.Mods = Modifiers();
919     Op->StartLoc = S;
920     Op->EndLoc = E;
921     return Op;
922   }
923 
924   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925                                        const class MCExpr *Expr, SMLoc S) {
926     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927     Op->Expr = Expr;
928     Op->StartLoc = S;
929     Op->EndLoc = S;
930     return Op;
931   }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936   return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947   int SgprIndexUnusedMin = -1;
948   int VgprIndexUnusedMin = -1;
949   MCContext *Ctx = nullptr;
950 
951   void usesSgprAt(int i) {
952     if (i >= SgprIndexUnusedMin) {
953       SgprIndexUnusedMin = ++i;
954       if (Ctx) {
955         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957       }
958     }
959   }
960 
961   void usesVgprAt(int i) {
962     if (i >= VgprIndexUnusedMin) {
963       VgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971 public:
972   KernelScopeInfo() = default;
973 
974   void initialize(MCContext &Context) {
975     Ctx = &Context;
976     usesSgprAt(SgprIndexUnusedMin = -1);
977     usesVgprAt(VgprIndexUnusedMin = -1);
978   }
979 
980   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981     switch (RegKind) {
982       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983       case IS_AGPR: // fall through
984       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985       default: break;
986     }
987   }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991   MCAsmParser &Parser;
992 
993   // Number of extra operands parsed after the first optional operand.
994   // This may be necessary to skip hardcoded mandatory operands.
995   static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997   unsigned ForcedEncodingSize = 0;
998   bool ForcedDPP = false;
999   bool ForcedSDWA = false;
1000   KernelScopeInfo KernelScope;
1001 
1002   /// @name Auto-generated Match Functions
1003   /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008   /// }
1009 
1010 private:
1011   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012   bool OutOfRangeError(SMRange Range);
1013   /// Calculate VGPR/SGPR blocks required for given target, reserved
1014   /// registers, and user-specified NextFreeXGPR values.
1015   ///
1016   /// \param Features [in] Target features, used for bug corrections.
1017   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021   /// descriptor field, if valid.
1022   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026   /// \param VGPRBlocks [out] Result VGPR block count.
1027   /// \param SGPRBlocks [out] Result SGPR block count.
1028   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029                           bool FlatScrUsed, bool XNACKUsed,
1030                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031                           SMRange VGPRRange, unsigned NextFreeSGPR,
1032                           SMRange SGPRRange, unsigned &VGPRBlocks,
1033                           unsigned &SGPRBlocks);
1034   bool ParseDirectiveAMDGCNTarget();
1035   bool ParseDirectiveAMDHSAKernel();
1036   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037   bool ParseDirectiveHSACodeObjectVersion();
1038   bool ParseDirectiveHSACodeObjectISA();
1039   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040   bool ParseDirectiveAMDKernelCodeT();
1041   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042   bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044   bool ParseDirectiveISAVersion();
1045   bool ParseDirectiveHSAMetadata();
1046   bool ParseDirectivePALMetadataBegin();
1047   bool ParseDirectivePALMetadata();
1048   bool ParseDirectiveAMDGPULDS();
1049 
1050   /// Common code to parse out a block of text (typically YAML) between start and
1051   /// end directives.
1052   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053                            const char *AssemblerDirectiveEnd,
1054                            std::string &CollectString);
1055 
1056   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057                              RegisterKind RegKind, unsigned Reg1,
1058                              unsigned RegNum);
1059   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060                            unsigned& RegNum, unsigned& RegWidth,
1061                            unsigned *DwordRegIndex);
1062   bool isRegister();
1063   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065   void initializeGprCountSymbol(RegisterKind RegKind);
1066   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067                              unsigned RegWidth);
1068   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071                  bool IsGdsHardcoded);
1072 
1073 public:
1074   enum AMDGPUMatchResultTy {
1075     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076   };
1077   enum OperandMode {
1078     OperandMode_Default,
1079     OperandMode_NSA,
1080   };
1081 
1082   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083 
1084   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085                const MCInstrInfo &MII,
1086                const MCTargetOptions &Options)
1087       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088     MCAsmParserExtension::Initialize(Parser);
1089 
1090     if (getFeatureBits().none()) {
1091       // Set default features.
1092       copySTI().ToggleFeature("southern-islands");
1093     }
1094 
1095     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096 
1097     {
1098       // TODO: make those pre-defined variables read-only.
1099       // Currently there is none suitable machinery in the core llvm-mc for this.
1100       // MCSymbol::isRedefinable is intended for another purpose, and
1101       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103       MCContext &Ctx = getContext();
1104       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105         MCSymbol *Sym =
1106             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112       } else {
1113         MCSymbol *Sym =
1114             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120       }
1121       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122         initializeGprCountSymbol(IS_VGPR);
1123         initializeGprCountSymbol(IS_SGPR);
1124       } else
1125         KernelScope.initialize(getContext());
1126     }
1127   }
1128 
1129   bool hasXNACK() const {
1130     return AMDGPU::hasXNACK(getSTI());
1131   }
1132 
1133   bool hasMIMG_R128() const {
1134     return AMDGPU::hasMIMG_R128(getSTI());
1135   }
1136 
1137   bool hasPackedD16() const {
1138     return AMDGPU::hasPackedD16(getSTI());
1139   }
1140 
1141   bool isSI() const {
1142     return AMDGPU::isSI(getSTI());
1143   }
1144 
1145   bool isCI() const {
1146     return AMDGPU::isCI(getSTI());
1147   }
1148 
1149   bool isVI() const {
1150     return AMDGPU::isVI(getSTI());
1151   }
1152 
1153   bool isGFX9() const {
1154     return AMDGPU::isGFX9(getSTI());
1155   }
1156 
1157   bool isGFX10() const {
1158     return AMDGPU::isGFX10(getSTI());
1159   }
1160 
1161   bool hasInv2PiInlineImm() const {
1162     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163   }
1164 
1165   bool hasFlatOffsets() const {
1166     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167   }
1168 
1169   bool hasSGPR102_SGPR103() const {
1170     return !isVI() && !isGFX9();
1171   }
1172 
1173   bool hasSGPR104_SGPR105() const {
1174     return isGFX10();
1175   }
1176 
1177   bool hasIntClamp() const {
1178     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179   }
1180 
1181   AMDGPUTargetStreamer &getTargetStreamer() {
1182     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183     return static_cast<AMDGPUTargetStreamer &>(TS);
1184   }
1185 
1186   const MCRegisterInfo *getMRI() const {
1187     // We need this const_cast because for some reason getContext() is not const
1188     // in MCAsmParser.
1189     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190   }
1191 
1192   const MCInstrInfo *getMII() const {
1193     return &MII;
1194   }
1195 
1196   const FeatureBitset &getFeatureBits() const {
1197     return getSTI().getFeatureBits();
1198   }
1199 
1200   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203 
1204   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206   bool isForcedDPP() const { return ForcedDPP; }
1207   bool isForcedSDWA() const { return ForcedSDWA; }
1208   ArrayRef<unsigned> getMatchedVariants() const;
1209 
1210   std::unique_ptr<AMDGPUOperand> parseRegister();
1211   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214                                       unsigned Kind) override;
1215   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216                                OperandVector &Operands, MCStreamer &Out,
1217                                uint64_t &ErrorInfo,
1218                                bool MatchingInlineAsm) override;
1219   bool ParseDirective(AsmToken DirectiveID) override;
1220   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221                                     OperandMode Mode = OperandMode_Default);
1222   StringRef parseMnemonicSuffix(StringRef Name);
1223   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224                         SMLoc NameLoc, OperandVector &Operands) override;
1225   //bool ProcessInstruction(MCInst &Inst);
1226 
1227   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228 
1229   OperandMatchResultTy
1230   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232                      bool (*ConvertResult)(int64_t &) = nullptr);
1233 
1234   OperandMatchResultTy
1235   parseOperandArrayWithPrefix(const char *Prefix,
1236                               OperandVector &Operands,
1237                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238                               bool (*ConvertResult)(int64_t&) = nullptr);
1239 
1240   OperandMatchResultTy
1241   parseNamedBit(const char *Name, OperandVector &Operands,
1242                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244                                              StringRef &Value);
1245 
1246   bool isModifier();
1247   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251   bool parseSP3NegModifier();
1252   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253   OperandMatchResultTy parseReg(OperandVector &Operands);
1254   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261 
1262   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266 
1267   bool parseCnt(int64_t &IntVal);
1268   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270 
1271 private:
1272   struct OperandInfoTy {
1273     int64_t Id;
1274     bool IsSymbolic = false;
1275     bool IsDefined = false;
1276 
1277     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278   };
1279 
1280   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281   bool validateSendMsg(const OperandInfoTy &Msg,
1282                        const OperandInfoTy &Op,
1283                        const OperandInfoTy &Stream,
1284                        const SMLoc Loc);
1285 
1286   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287   bool validateHwreg(const OperandInfoTy &HwReg,
1288                      const int64_t Offset,
1289                      const int64_t Width,
1290                      const SMLoc Loc);
1291 
1292   void errorExpTgt();
1293   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295 
1296   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298   bool validateSOPLiteral(const MCInst &Inst) const;
1299   bool validateConstantBusLimitations(const MCInst &Inst);
1300   bool validateEarlyClobberLimitations(const MCInst &Inst);
1301   bool validateIntClampSupported(const MCInst &Inst);
1302   bool validateMIMGAtomicDMask(const MCInst &Inst);
1303   bool validateMIMGGatherDMask(const MCInst &Inst);
1304   bool validateMIMGDataSize(const MCInst &Inst);
1305   bool validateMIMGAddrSize(const MCInst &Inst);
1306   bool validateMIMGD16(const MCInst &Inst);
1307   bool validateMIMGDim(const MCInst &Inst);
1308   bool validateLdsDirect(const MCInst &Inst);
1309   bool validateOpSel(const MCInst &Inst);
1310   bool validateVccOperand(unsigned Reg) const;
1311   bool validateVOP3Literal(const MCInst &Inst) const;
1312   unsigned getConstantBusLimit(unsigned Opcode) const;
1313   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1314   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1315   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 
1317   bool isId(const StringRef Id) const;
1318   bool isId(const AsmToken &Token, const StringRef Id) const;
1319   bool isToken(const AsmToken::TokenKind Kind) const;
1320   bool trySkipId(const StringRef Id);
1321   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1322   bool trySkipToken(const AsmToken::TokenKind Kind);
1323   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1324   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1325   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1326   AsmToken::TokenKind getTokenKind() const;
1327   bool parseExpr(int64_t &Imm);
1328   bool parseExpr(OperandVector &Operands);
1329   StringRef getTokenStr() const;
1330   AsmToken peekToken();
1331   AsmToken getToken() const;
1332   SMLoc getLoc() const;
1333   void lex();
1334 
1335 public:
1336   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1337   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 
1339   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1340   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1341   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1342   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1343   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1344   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 
1346   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1347                             const unsigned MinVal,
1348                             const unsigned MaxVal,
1349                             const StringRef ErrMsg);
1350   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1351   bool parseSwizzleOffset(int64_t &Imm);
1352   bool parseSwizzleMacro(int64_t &Imm);
1353   bool parseSwizzleQuadPerm(int64_t &Imm);
1354   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1355   bool parseSwizzleBroadcast(int64_t &Imm);
1356   bool parseSwizzleSwap(int64_t &Imm);
1357   bool parseSwizzleReverse(int64_t &Imm);
1358 
1359   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1360   int64_t parseGPRIdxMacro();
1361 
1362   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1363   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1364   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1365   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1366   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 
1368   AMDGPUOperand::Ptr defaultDLC() const;
1369   AMDGPUOperand::Ptr defaultGLC() const;
1370   AMDGPUOperand::Ptr defaultSLC() const;
1371 
1372   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1373   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1374   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1375   AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 
1377   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 
1379   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1380                OptionalImmIndexMap &OptionalIdx);
1381   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1382   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1383   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 
1385   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 
1387   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1388                bool IsAtomic = false);
1389   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 
1391   OperandMatchResultTy parseDim(OperandVector &Operands);
1392   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1393   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1394   AMDGPUOperand::Ptr defaultRowMask() const;
1395   AMDGPUOperand::Ptr defaultBankMask() const;
1396   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1397   AMDGPUOperand::Ptr defaultFI() const;
1398   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1399   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 
1401   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1402                                     AMDGPUOperand::ImmTy Type);
1403   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1404   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1405   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1406   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1407   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1408   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1409                 uint64_t BasicInstType, bool skipVcc = false);
1410 
1411   AMDGPUOperand::Ptr defaultBLGP() const;
1412   AMDGPUOperand::Ptr defaultCBSZ() const;
1413   AMDGPUOperand::Ptr defaultABID() const;
1414 
1415   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1416   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1417 };
1418 
1419 struct OptionalOperand {
1420   const char *Name;
1421   AMDGPUOperand::ImmTy Type;
1422   bool IsBit;
1423   bool (*ConvertResult)(int64_t&);
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics *getFltSemantics(unsigned Size) {
1430   switch (Size) {
1431   case 4:
1432     return &APFloat::IEEEsingle();
1433   case 8:
1434     return &APFloat::IEEEdouble();
1435   case 2:
1436     return &APFloat::IEEEhalf();
1437   default:
1438     llvm_unreachable("unsupported fp type");
1439   }
1440 }
1441 
1442 static const fltSemantics *getFltSemantics(MVT VT) {
1443   return getFltSemantics(VT.getSizeInBits() / 8);
1444 }
1445 
1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1447   switch (OperandType) {
1448   case AMDGPU::OPERAND_REG_IMM_INT32:
1449   case AMDGPU::OPERAND_REG_IMM_FP32:
1450   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1451   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1452   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1453   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1454     return &APFloat::IEEEsingle();
1455   case AMDGPU::OPERAND_REG_IMM_INT64:
1456   case AMDGPU::OPERAND_REG_IMM_FP64:
1457   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1458   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1459     return &APFloat::IEEEdouble();
1460   case AMDGPU::OPERAND_REG_IMM_INT16:
1461   case AMDGPU::OPERAND_REG_IMM_FP16:
1462   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1463   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1464   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1465   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1466   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1467   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1468   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1469   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1470   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1471   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1472     return &APFloat::IEEEhalf();
1473   default:
1474     llvm_unreachable("unsupported fp type");
1475   }
1476 }
1477 
1478 //===----------------------------------------------------------------------===//
1479 // Operand
1480 //===----------------------------------------------------------------------===//
1481 
1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1483   bool Lost;
1484 
1485   // Convert literal to single precision
1486   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1487                                                APFloat::rmNearestTiesToEven,
1488                                                &Lost);
1489   // We allow precision lost but not overflow or underflow
1490   if (Status != APFloat::opOK &&
1491       Lost &&
1492       ((Status & APFloat::opOverflow)  != 0 ||
1493        (Status & APFloat::opUnderflow) != 0)) {
1494     return false;
1495   }
1496 
1497   return true;
1498 }
1499 
1500 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1501   return isUIntN(Size, Val) || isIntN(Size, Val);
1502 }
1503 
1504 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 
1506   // This is a hack to enable named inline values like
1507   // shared_base with both 32-bit and 64-bit operands.
1508   // Note that these values are defined as
1509   // 32-bit operands only.
1510   if (isInlineValue()) {
1511     return true;
1512   }
1513 
1514   if (!isImmTy(ImmTyNone)) {
1515     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1516     return false;
1517   }
1518   // TODO: We should avoid using host float here. It would be better to
1519   // check the float bit values which is what a few other places do.
1520   // We've had bot failures before due to weird NaN support on mips hosts.
1521 
1522   APInt Literal(64, Imm.Val);
1523 
1524   if (Imm.IsFPImm) { // We got fp literal token
1525     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1526       return AMDGPU::isInlinableLiteral64(Imm.Val,
1527                                           AsmParser->hasInv2PiInlineImm());
1528     }
1529 
1530     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1531     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1532       return false;
1533 
1534     if (type.getScalarSizeInBits() == 16) {
1535       return AMDGPU::isInlinableLiteral16(
1536         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1537         AsmParser->hasInv2PiInlineImm());
1538     }
1539 
1540     // Check if single precision literal is inlinable
1541     return AMDGPU::isInlinableLiteral32(
1542       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1543       AsmParser->hasInv2PiInlineImm());
1544   }
1545 
1546   // We got int literal token.
1547   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548     return AMDGPU::isInlinableLiteral64(Imm.Val,
1549                                         AsmParser->hasInv2PiInlineImm());
1550   }
1551 
1552   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1553     return false;
1554   }
1555 
1556   if (type.getScalarSizeInBits() == 16) {
1557     return AMDGPU::isInlinableLiteral16(
1558       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1559       AsmParser->hasInv2PiInlineImm());
1560   }
1561 
1562   return AMDGPU::isInlinableLiteral32(
1563     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1564     AsmParser->hasInv2PiInlineImm());
1565 }
1566 
1567 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1568   // Check that this immediate can be added as literal
1569   if (!isImmTy(ImmTyNone)) {
1570     return false;
1571   }
1572 
1573   if (!Imm.IsFPImm) {
1574     // We got int literal token.
1575 
1576     if (type == MVT::f64 && hasFPModifiers()) {
1577       // Cannot apply fp modifiers to int literals preserving the same semantics
1578       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579       // disable these cases.
1580       return false;
1581     }
1582 
1583     unsigned Size = type.getSizeInBits();
1584     if (Size == 64)
1585       Size = 32;
1586 
1587     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1588     // types.
1589     return isSafeTruncation(Imm.Val, Size);
1590   }
1591 
1592   // We got fp literal token
1593   if (type == MVT::f64) { // Expected 64-bit fp operand
1594     // We would set low 64-bits of literal to zeroes but we accept this literals
1595     return true;
1596   }
1597 
1598   if (type == MVT::i64) { // Expected 64-bit int operand
1599     // We don't allow fp literals in 64-bit integer instructions. It is
1600     // unclear how we should encode them.
1601     return false;
1602   }
1603 
1604   // We allow fp literals with f16x2 operands assuming that the specified
1605   // literal goes into the lower half and the upper half is zero. We also
1606   // require that the literal may be losslesly converted to f16.
1607   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1608                      (type == MVT::v2i16)? MVT::i16 : type;
1609 
1610   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1611   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1612 }
1613 
1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1615   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1616 }
1617 
1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1619   if (AsmParser->isVI())
1620     return isVReg32();
1621   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1622     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1623   else
1624     return false;
1625 }
1626 
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628   return isSDWAOperand(MVT::f16);
1629 }
1630 
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632   return isSDWAOperand(MVT::f32);
1633 }
1634 
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636   return isSDWAOperand(MVT::i16);
1637 }
1638 
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640   return isSDWAOperand(MVT::i32);
1641 }
1642 
1643 bool AMDGPUOperand::isBoolReg() const {
1644   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1645          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1646 }
1647 
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 {
1650   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1651   assert(Size == 2 || Size == 4 || Size == 8);
1652 
1653   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 
1655   if (Imm.Mods.Abs) {
1656     Val &= ~FpSignMask;
1657   }
1658   if (Imm.Mods.Neg) {
1659     Val ^= FpSignMask;
1660   }
1661 
1662   return Val;
1663 }
1664 
1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1666   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1667                              Inst.getNumOperands())) {
1668     addLiteralImmOperand(Inst, Imm.Val,
1669                          ApplyModifiers &
1670                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1671   } else {
1672     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1673     Inst.addOperand(MCOperand::createImm(Imm.Val));
1674   }
1675 }
1676 
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1678   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1679   auto OpNum = Inst.getNumOperands();
1680   // Check that this operand accepts literals
1681   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 
1683   if (ApplyModifiers) {
1684     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1685     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1686     Val = applyInputFPModifiers(Val, Size);
1687   }
1688 
1689   APInt Literal(64, Val);
1690   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 
1692   if (Imm.IsFPImm) { // We got fp literal token
1693     switch (OpTy) {
1694     case AMDGPU::OPERAND_REG_IMM_INT64:
1695     case AMDGPU::OPERAND_REG_IMM_FP64:
1696     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1697     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1698       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1699                                        AsmParser->hasInv2PiInlineImm())) {
1700         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1701         return;
1702       }
1703 
1704       // Non-inlineable
1705       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1706         // For fp operands we check if low 32 bits are zeros
1707         if (Literal.getLoBits(32) != 0) {
1708           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1709           "Can't encode literal as exact 64-bit floating-point operand. "
1710           "Low 32-bits will be set to zero");
1711         }
1712 
1713         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1714         return;
1715       }
1716 
1717       // We don't allow fp literals in 64-bit integer instructions. It is
1718       // unclear how we should encode them. This case should be checked earlier
1719       // in predicate methods (isLiteralImm())
1720       llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 
1722     case AMDGPU::OPERAND_REG_IMM_INT32:
1723     case AMDGPU::OPERAND_REG_IMM_FP32:
1724     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1725     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1726     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1727     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1728     case AMDGPU::OPERAND_REG_IMM_INT16:
1729     case AMDGPU::OPERAND_REG_IMM_FP16:
1730     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1740       bool lost;
1741       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1742       // Convert literal to single precision
1743       FPLiteral.convert(*getOpFltSemantics(OpTy),
1744                         APFloat::rmNearestTiesToEven, &lost);
1745       // We allow precision lost but not overflow or underflow. This should be
1746       // checked earlier in isLiteralImm()
1747 
1748       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1749       Inst.addOperand(MCOperand::createImm(ImmVal));
1750       return;
1751     }
1752     default:
1753       llvm_unreachable("invalid operand size");
1754     }
1755 
1756     return;
1757   }
1758 
1759   // We got int literal token.
1760   // Only sign extend inline immediates.
1761   switch (OpTy) {
1762   case AMDGPU::OPERAND_REG_IMM_INT32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32:
1764   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1769   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1770     if (isSafeTruncation(Val, 32) &&
1771         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1772                                      AsmParser->hasInv2PiInlineImm())) {
1773       Inst.addOperand(MCOperand::createImm(Val));
1774       return;
1775     }
1776 
1777     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1778     return;
1779 
1780   case AMDGPU::OPERAND_REG_IMM_INT64:
1781   case AMDGPU::OPERAND_REG_IMM_FP64:
1782   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1783   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1784     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1785       Inst.addOperand(MCOperand::createImm(Val));
1786       return;
1787     }
1788 
1789     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1790     return;
1791 
1792   case AMDGPU::OPERAND_REG_IMM_INT16:
1793   case AMDGPU::OPERAND_REG_IMM_FP16:
1794   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1795   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1798     if (isSafeTruncation(Val, 16) &&
1799         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1800                                      AsmParser->hasInv2PiInlineImm())) {
1801       Inst.addOperand(MCOperand::createImm(Val));
1802       return;
1803     }
1804 
1805     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1806     return;
1807 
1808   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1809   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1810   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1811   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1812     assert(isSafeTruncation(Val, 16));
1813     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1814                                         AsmParser->hasInv2PiInlineImm()));
1815 
1816     Inst.addOperand(MCOperand::createImm(Val));
1817     return;
1818   }
1819   default:
1820     llvm_unreachable("invalid operand size");
1821   }
1822 }
1823 
1824 template <unsigned Bitwidth>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1826   APInt Literal(64, Imm.Val);
1827 
1828   if (!Imm.IsFPImm) {
1829     // We got int literal token.
1830     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1831     return;
1832   }
1833 
1834   bool Lost;
1835   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1836   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1837                     APFloat::rmNearestTiesToEven, &Lost);
1838   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1839 }
1840 
1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1842   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1843 }
1844 
1845 static bool isInlineValue(unsigned Reg) {
1846   switch (Reg) {
1847   case AMDGPU::SRC_SHARED_BASE:
1848   case AMDGPU::SRC_SHARED_LIMIT:
1849   case AMDGPU::SRC_PRIVATE_BASE:
1850   case AMDGPU::SRC_PRIVATE_LIMIT:
1851   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1852     return true;
1853   case AMDGPU::SRC_VCCZ:
1854   case AMDGPU::SRC_EXECZ:
1855   case AMDGPU::SRC_SCC:
1856     return true;
1857   case AMDGPU::SGPR_NULL:
1858     return true;
1859   default:
1860     return false;
1861   }
1862 }
1863 
1864 bool AMDGPUOperand::isInlineValue() const {
1865   return isRegKind() && ::isInlineValue(getReg());
1866 }
1867 
1868 //===----------------------------------------------------------------------===//
1869 // AsmParser
1870 //===----------------------------------------------------------------------===//
1871 
1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1873   if (Is == IS_VGPR) {
1874     switch (RegWidth) {
1875       default: return -1;
1876       case 1: return AMDGPU::VGPR_32RegClassID;
1877       case 2: return AMDGPU::VReg_64RegClassID;
1878       case 3: return AMDGPU::VReg_96RegClassID;
1879       case 4: return AMDGPU::VReg_128RegClassID;
1880       case 5: return AMDGPU::VReg_160RegClassID;
1881       case 8: return AMDGPU::VReg_256RegClassID;
1882       case 16: return AMDGPU::VReg_512RegClassID;
1883       case 32: return AMDGPU::VReg_1024RegClassID;
1884     }
1885   } else if (Is == IS_TTMP) {
1886     switch (RegWidth) {
1887       default: return -1;
1888       case 1: return AMDGPU::TTMP_32RegClassID;
1889       case 2: return AMDGPU::TTMP_64RegClassID;
1890       case 4: return AMDGPU::TTMP_128RegClassID;
1891       case 8: return AMDGPU::TTMP_256RegClassID;
1892       case 16: return AMDGPU::TTMP_512RegClassID;
1893     }
1894   } else if (Is == IS_SGPR) {
1895     switch (RegWidth) {
1896       default: return -1;
1897       case 1: return AMDGPU::SGPR_32RegClassID;
1898       case 2: return AMDGPU::SGPR_64RegClassID;
1899       case 4: return AMDGPU::SGPR_128RegClassID;
1900       case 8: return AMDGPU::SGPR_256RegClassID;
1901       case 16: return AMDGPU::SGPR_512RegClassID;
1902     }
1903   } else if (Is == IS_AGPR) {
1904     switch (RegWidth) {
1905       default: return -1;
1906       case 1: return AMDGPU::AGPR_32RegClassID;
1907       case 2: return AMDGPU::AReg_64RegClassID;
1908       case 4: return AMDGPU::AReg_128RegClassID;
1909       case 16: return AMDGPU::AReg_512RegClassID;
1910       case 32: return AMDGPU::AReg_1024RegClassID;
1911     }
1912   }
1913   return -1;
1914 }
1915 
1916 static unsigned getSpecialRegForName(StringRef RegName) {
1917   return StringSwitch<unsigned>(RegName)
1918     .Case("exec", AMDGPU::EXEC)
1919     .Case("vcc", AMDGPU::VCC)
1920     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1921     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1922     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1923     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1924     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1925     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1926     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1927     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1928     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1929     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1930     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1931     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1932     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1933     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1934     .Case("m0", AMDGPU::M0)
1935     .Case("vccz", AMDGPU::SRC_VCCZ)
1936     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1937     .Case("execz", AMDGPU::SRC_EXECZ)
1938     .Case("src_execz", AMDGPU::SRC_EXECZ)
1939     .Case("scc", AMDGPU::SRC_SCC)
1940     .Case("src_scc", AMDGPU::SRC_SCC)
1941     .Case("tba", AMDGPU::TBA)
1942     .Case("tma", AMDGPU::TMA)
1943     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1944     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1945     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1946     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1947     .Case("vcc_lo", AMDGPU::VCC_LO)
1948     .Case("vcc_hi", AMDGPU::VCC_HI)
1949     .Case("exec_lo", AMDGPU::EXEC_LO)
1950     .Case("exec_hi", AMDGPU::EXEC_HI)
1951     .Case("tma_lo", AMDGPU::TMA_LO)
1952     .Case("tma_hi", AMDGPU::TMA_HI)
1953     .Case("tba_lo", AMDGPU::TBA_LO)
1954     .Case("tba_hi", AMDGPU::TBA_HI)
1955     .Case("null", AMDGPU::SGPR_NULL)
1956     .Default(0);
1957 }
1958 
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1960                                     SMLoc &EndLoc) {
1961   auto R = parseRegister();
1962   if (!R) return true;
1963   assert(R->isReg());
1964   RegNo = R->getReg();
1965   StartLoc = R->getStartLoc();
1966   EndLoc = R->getEndLoc();
1967   return false;
1968 }
1969 
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1971                                             RegisterKind RegKind, unsigned Reg1,
1972                                             unsigned RegNum) {
1973   switch (RegKind) {
1974   case IS_SPECIAL:
1975     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1976       Reg = AMDGPU::EXEC;
1977       RegWidth = 2;
1978       return true;
1979     }
1980     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1981       Reg = AMDGPU::FLAT_SCR;
1982       RegWidth = 2;
1983       return true;
1984     }
1985     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1986       Reg = AMDGPU::XNACK_MASK;
1987       RegWidth = 2;
1988       return true;
1989     }
1990     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1991       Reg = AMDGPU::VCC;
1992       RegWidth = 2;
1993       return true;
1994     }
1995     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1996       Reg = AMDGPU::TBA;
1997       RegWidth = 2;
1998       return true;
1999     }
2000     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2001       Reg = AMDGPU::TMA;
2002       RegWidth = 2;
2003       return true;
2004     }
2005     return false;
2006   case IS_VGPR:
2007   case IS_SGPR:
2008   case IS_AGPR:
2009   case IS_TTMP:
2010     if (Reg1 != Reg + RegWidth) {
2011       return false;
2012     }
2013     RegWidth++;
2014     return true;
2015   default:
2016     llvm_unreachable("unexpected register kind");
2017   }
2018 }
2019 
2020 static const StringRef Registers[] = {
2021   { "v" },
2022   { "s" },
2023   { "ttmp" },
2024   { "acc" },
2025   { "a" },
2026 };
2027 
2028 bool
2029 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2030                             const AsmToken &NextToken) const {
2031 
2032   // A list of consecutive registers: [s0,s1,s2,s3]
2033   if (Token.is(AsmToken::LBrac))
2034     return true;
2035 
2036   if (!Token.is(AsmToken::Identifier))
2037     return false;
2038 
2039   // A single register like s0 or a range of registers like s[0:1]
2040 
2041   StringRef RegName = Token.getString();
2042 
2043   for (StringRef Reg : Registers) {
2044     if (RegName.startswith(Reg)) {
2045       if (Reg.size() < RegName.size()) {
2046         unsigned RegNum;
2047         // A single register with an index: rXX
2048         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2049           return true;
2050       } else {
2051         // A range of registers: r[XX:YY].
2052         if (NextToken.is(AsmToken::LBrac))
2053           return true;
2054       }
2055     }
2056   }
2057 
2058   return getSpecialRegForName(RegName);
2059 }
2060 
2061 bool
2062 AMDGPUAsmParser::isRegister()
2063 {
2064   return isRegister(getToken(), peekToken());
2065 }
2066 
2067 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2068                                           unsigned &RegNum, unsigned &RegWidth,
2069                                           unsigned *DwordRegIndex) {
2070   if (DwordRegIndex) { *DwordRegIndex = 0; }
2071   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2072   if (getLexer().is(AsmToken::Identifier)) {
2073     StringRef RegName = Parser.getTok().getString();
2074     if ((Reg = getSpecialRegForName(RegName))) {
2075       Parser.Lex();
2076       RegKind = IS_SPECIAL;
2077     } else {
2078       unsigned RegNumIndex = 0;
2079       if (RegName[0] == 'v') {
2080         RegNumIndex = 1;
2081         RegKind = IS_VGPR;
2082       } else if (RegName[0] == 's') {
2083         RegNumIndex = 1;
2084         RegKind = IS_SGPR;
2085       } else if (RegName[0] == 'a') {
2086         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2087         RegKind = IS_AGPR;
2088       } else if (RegName.startswith("ttmp")) {
2089         RegNumIndex = strlen("ttmp");
2090         RegKind = IS_TTMP;
2091       } else {
2092         return false;
2093       }
2094       if (RegName.size() > RegNumIndex) {
2095         // Single 32-bit register: vXX.
2096         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2097           return false;
2098         Parser.Lex();
2099         RegWidth = 1;
2100       } else {
2101         // Range of registers: v[XX:YY]. ":YY" is optional.
2102         Parser.Lex();
2103         int64_t RegLo, RegHi;
2104         if (getLexer().isNot(AsmToken::LBrac))
2105           return false;
2106         Parser.Lex();
2107 
2108         if (getParser().parseAbsoluteExpression(RegLo))
2109           return false;
2110 
2111         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2112         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2113           return false;
2114         Parser.Lex();
2115 
2116         if (isRBrace) {
2117           RegHi = RegLo;
2118         } else {
2119           if (getParser().parseAbsoluteExpression(RegHi))
2120             return false;
2121 
2122           if (getLexer().isNot(AsmToken::RBrac))
2123             return false;
2124           Parser.Lex();
2125         }
2126         RegNum = (unsigned) RegLo;
2127         RegWidth = (RegHi - RegLo) + 1;
2128       }
2129     }
2130   } else if (getLexer().is(AsmToken::LBrac)) {
2131     // List of consecutive registers: [s0,s1,s2,s3]
2132     Parser.Lex();
2133     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2134       return false;
2135     if (RegWidth != 1)
2136       return false;
2137     RegisterKind RegKind1;
2138     unsigned Reg1, RegNum1, RegWidth1;
2139     do {
2140       if (getLexer().is(AsmToken::Comma)) {
2141         Parser.Lex();
2142       } else if (getLexer().is(AsmToken::RBrac)) {
2143         Parser.Lex();
2144         break;
2145       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2146         if (RegWidth1 != 1) {
2147           return false;
2148         }
2149         if (RegKind1 != RegKind) {
2150           return false;
2151         }
2152         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2153           return false;
2154         }
2155       } else {
2156         return false;
2157       }
2158     } while (true);
2159   } else {
2160     return false;
2161   }
2162   switch (RegKind) {
2163   case IS_SPECIAL:
2164     RegNum = 0;
2165     RegWidth = 1;
2166     break;
2167   case IS_VGPR:
2168   case IS_SGPR:
2169   case IS_AGPR:
2170   case IS_TTMP:
2171   {
2172     unsigned Size = 1;
2173     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2174       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2175       Size = std::min(RegWidth, 4u);
2176     }
2177     if (RegNum % Size != 0)
2178       return false;
2179     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2180     RegNum = RegNum / Size;
2181     int RCID = getRegClass(RegKind, RegWidth);
2182     if (RCID == -1)
2183       return false;
2184     const MCRegisterClass RC = TRI->getRegClass(RCID);
2185     if (RegNum >= RC.getNumRegs())
2186       return false;
2187     Reg = RC.getRegister(RegNum);
2188     break;
2189   }
2190 
2191   default:
2192     llvm_unreachable("unexpected register kind");
2193   }
2194 
2195   if (!subtargetHasRegister(*TRI, Reg))
2196     return false;
2197   return true;
2198 }
2199 
2200 Optional<StringRef>
2201 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2202   switch (RegKind) {
2203   case IS_VGPR:
2204     return StringRef(".amdgcn.next_free_vgpr");
2205   case IS_SGPR:
2206     return StringRef(".amdgcn.next_free_sgpr");
2207   default:
2208     return None;
2209   }
2210 }
2211 
2212 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2213   auto SymbolName = getGprCountSymbolName(RegKind);
2214   assert(SymbolName && "initializing invalid register kind");
2215   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2216   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2217 }
2218 
2219 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2220                                             unsigned DwordRegIndex,
2221                                             unsigned RegWidth) {
2222   // Symbols are only defined for GCN targets
2223   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2224     return true;
2225 
2226   auto SymbolName = getGprCountSymbolName(RegKind);
2227   if (!SymbolName)
2228     return true;
2229   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2230 
2231   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2232   int64_t OldCount;
2233 
2234   if (!Sym->isVariable())
2235     return !Error(getParser().getTok().getLoc(),
2236                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2237   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2238     return !Error(
2239         getParser().getTok().getLoc(),
2240         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2241 
2242   if (OldCount <= NewMax)
2243     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2244 
2245   return true;
2246 }
2247 
2248 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2249   const auto &Tok = Parser.getTok();
2250   SMLoc StartLoc = Tok.getLoc();
2251   SMLoc EndLoc = Tok.getEndLoc();
2252   RegisterKind RegKind;
2253   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2254 
2255   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2256     //FIXME: improve error messages (bug 41303).
2257     Error(StartLoc, "not a valid operand.");
2258     return nullptr;
2259   }
2260   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2261     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2262       return nullptr;
2263   } else
2264     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2265   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2266 }
2267 
2268 OperandMatchResultTy
2269 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2270   // TODO: add syntactic sugar for 1/(2*PI)
2271 
2272   assert(!isRegister());
2273   assert(!isModifier());
2274 
2275   const auto& Tok = getToken();
2276   const auto& NextTok = peekToken();
2277   bool IsReal = Tok.is(AsmToken::Real);
2278   SMLoc S = getLoc();
2279   bool Negate = false;
2280 
2281   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2282     lex();
2283     IsReal = true;
2284     Negate = true;
2285   }
2286 
2287   if (IsReal) {
2288     // Floating-point expressions are not supported.
2289     // Can only allow floating-point literals with an
2290     // optional sign.
2291 
2292     StringRef Num = getTokenStr();
2293     lex();
2294 
2295     APFloat RealVal(APFloat::IEEEdouble());
2296     auto roundMode = APFloat::rmNearestTiesToEven;
2297     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2298       return MatchOperand_ParseFail;
2299     }
2300     if (Negate)
2301       RealVal.changeSign();
2302 
2303     Operands.push_back(
2304       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2305                                AMDGPUOperand::ImmTyNone, true));
2306 
2307     return MatchOperand_Success;
2308 
2309   } else {
2310     int64_t IntVal;
2311     const MCExpr *Expr;
2312     SMLoc S = getLoc();
2313 
2314     if (HasSP3AbsModifier) {
2315       // This is a workaround for handling expressions
2316       // as arguments of SP3 'abs' modifier, for example:
2317       //     |1.0|
2318       //     |-1|
2319       //     |1+x|
2320       // This syntax is not compatible with syntax of standard
2321       // MC expressions (due to the trailing '|').
2322       SMLoc EndLoc;
2323       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2324         return MatchOperand_ParseFail;
2325     } else {
2326       if (Parser.parseExpression(Expr))
2327         return MatchOperand_ParseFail;
2328     }
2329 
2330     if (Expr->evaluateAsAbsolute(IntVal)) {
2331       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2332     } else {
2333       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2334     }
2335 
2336     return MatchOperand_Success;
2337   }
2338 
2339   return MatchOperand_NoMatch;
2340 }
2341 
2342 OperandMatchResultTy
2343 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2344   if (!isRegister())
2345     return MatchOperand_NoMatch;
2346 
2347   if (auto R = parseRegister()) {
2348     assert(R->isReg());
2349     Operands.push_back(std::move(R));
2350     return MatchOperand_Success;
2351   }
2352   return MatchOperand_ParseFail;
2353 }
2354 
2355 OperandMatchResultTy
2356 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2357   auto res = parseReg(Operands);
2358   if (res != MatchOperand_NoMatch) {
2359     return res;
2360   } else if (isModifier()) {
2361     return MatchOperand_NoMatch;
2362   } else {
2363     return parseImm(Operands, HasSP3AbsMod);
2364   }
2365 }
2366 
2367 bool
2368 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2369   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2370     const auto &str = Token.getString();
2371     return str == "abs" || str == "neg" || str == "sext";
2372   }
2373   return false;
2374 }
2375 
2376 bool
2377 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2378   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2379 }
2380 
2381 bool
2382 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2383   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2384 }
2385 
2386 bool
2387 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2388   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2389 }
2390 
2391 // Check if this is an operand modifier or an opcode modifier
2392 // which may look like an expression but it is not. We should
2393 // avoid parsing these modifiers as expressions. Currently
2394 // recognized sequences are:
2395 //   |...|
2396 //   abs(...)
2397 //   neg(...)
2398 //   sext(...)
2399 //   -reg
2400 //   -|...|
2401 //   -abs(...)
2402 //   name:...
2403 // Note that simple opcode modifiers like 'gds' may be parsed as
2404 // expressions; this is a special case. See getExpressionAsToken.
2405 //
2406 bool
2407 AMDGPUAsmParser::isModifier() {
2408 
2409   AsmToken Tok = getToken();
2410   AsmToken NextToken[2];
2411   peekTokens(NextToken);
2412 
2413   return isOperandModifier(Tok, NextToken[0]) ||
2414          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2415          isOpcodeModifierWithVal(Tok, NextToken[0]);
2416 }
2417 
2418 // Check if the current token is an SP3 'neg' modifier.
2419 // Currently this modifier is allowed in the following context:
2420 //
2421 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2422 // 2. Before an 'abs' modifier: -abs(...)
2423 // 3. Before an SP3 'abs' modifier: -|...|
2424 //
2425 // In all other cases "-" is handled as a part
2426 // of an expression that follows the sign.
2427 //
2428 // Note: When "-" is followed by an integer literal,
2429 // this is interpreted as integer negation rather
2430 // than a floating-point NEG modifier applied to N.
2431 // Beside being contr-intuitive, such use of floating-point
2432 // NEG modifier would have resulted in different meaning
2433 // of integer literals used with VOP1/2/C and VOP3,
2434 // for example:
2435 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2436 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2437 // Negative fp literals with preceding "-" are
2438 // handled likewise for unifomtity
2439 //
2440 bool
2441 AMDGPUAsmParser::parseSP3NegModifier() {
2442 
2443   AsmToken NextToken[2];
2444   peekTokens(NextToken);
2445 
2446   if (isToken(AsmToken::Minus) &&
2447       (isRegister(NextToken[0], NextToken[1]) ||
2448        NextToken[0].is(AsmToken::Pipe) ||
2449        isId(NextToken[0], "abs"))) {
2450     lex();
2451     return true;
2452   }
2453 
2454   return false;
2455 }
2456 
2457 OperandMatchResultTy
2458 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2459                                               bool AllowImm) {
2460   bool Neg, SP3Neg;
2461   bool Abs, SP3Abs;
2462   SMLoc Loc;
2463 
2464   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2465   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2466     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2467     return MatchOperand_ParseFail;
2468   }
2469 
2470   SP3Neg = parseSP3NegModifier();
2471 
2472   Loc = getLoc();
2473   Neg = trySkipId("neg");
2474   if (Neg && SP3Neg) {
2475     Error(Loc, "expected register or immediate");
2476     return MatchOperand_ParseFail;
2477   }
2478   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2479     return MatchOperand_ParseFail;
2480 
2481   Abs = trySkipId("abs");
2482   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2483     return MatchOperand_ParseFail;
2484 
2485   Loc = getLoc();
2486   SP3Abs = trySkipToken(AsmToken::Pipe);
2487   if (Abs && SP3Abs) {
2488     Error(Loc, "expected register or immediate");
2489     return MatchOperand_ParseFail;
2490   }
2491 
2492   OperandMatchResultTy Res;
2493   if (AllowImm) {
2494     Res = parseRegOrImm(Operands, SP3Abs);
2495   } else {
2496     Res = parseReg(Operands);
2497   }
2498   if (Res != MatchOperand_Success) {
2499     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2500   }
2501 
2502   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2503     return MatchOperand_ParseFail;
2504   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2505     return MatchOperand_ParseFail;
2506   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2507     return MatchOperand_ParseFail;
2508 
2509   AMDGPUOperand::Modifiers Mods;
2510   Mods.Abs = Abs || SP3Abs;
2511   Mods.Neg = Neg || SP3Neg;
2512 
2513   if (Mods.hasFPModifiers()) {
2514     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2515     if (Op.isExpr()) {
2516       Error(Op.getStartLoc(), "expected an absolute expression");
2517       return MatchOperand_ParseFail;
2518     }
2519     Op.setModifiers(Mods);
2520   }
2521   return MatchOperand_Success;
2522 }
2523 
2524 OperandMatchResultTy
2525 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2526                                                bool AllowImm) {
2527   bool Sext = trySkipId("sext");
2528   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2529     return MatchOperand_ParseFail;
2530 
2531   OperandMatchResultTy Res;
2532   if (AllowImm) {
2533     Res = parseRegOrImm(Operands);
2534   } else {
2535     Res = parseReg(Operands);
2536   }
2537   if (Res != MatchOperand_Success) {
2538     return Sext? MatchOperand_ParseFail : Res;
2539   }
2540 
2541   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2542     return MatchOperand_ParseFail;
2543 
2544   AMDGPUOperand::Modifiers Mods;
2545   Mods.Sext = Sext;
2546 
2547   if (Mods.hasIntModifiers()) {
2548     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2549     if (Op.isExpr()) {
2550       Error(Op.getStartLoc(), "expected an absolute expression");
2551       return MatchOperand_ParseFail;
2552     }
2553     Op.setModifiers(Mods);
2554   }
2555 
2556   return MatchOperand_Success;
2557 }
2558 
2559 OperandMatchResultTy
2560 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2561   return parseRegOrImmWithFPInputMods(Operands, false);
2562 }
2563 
2564 OperandMatchResultTy
2565 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2566   return parseRegOrImmWithIntInputMods(Operands, false);
2567 }
2568 
2569 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2570   auto Loc = getLoc();
2571   if (trySkipId("off")) {
2572     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2573                                                 AMDGPUOperand::ImmTyOff, false));
2574     return MatchOperand_Success;
2575   }
2576 
2577   if (!isRegister())
2578     return MatchOperand_NoMatch;
2579 
2580   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2581   if (Reg) {
2582     Operands.push_back(std::move(Reg));
2583     return MatchOperand_Success;
2584   }
2585 
2586   return MatchOperand_ParseFail;
2587 
2588 }
2589 
2590 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2591   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2592 
2593   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2594       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2595       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2596       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2597     return Match_InvalidOperand;
2598 
2599   if ((TSFlags & SIInstrFlags::VOP3) &&
2600       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2601       getForcedEncodingSize() != 64)
2602     return Match_PreferE32;
2603 
2604   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2605       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2606     // v_mac_f32/16 allow only dst_sel == DWORD;
2607     auto OpNum =
2608         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2609     const auto &Op = Inst.getOperand(OpNum);
2610     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2611       return Match_InvalidOperand;
2612     }
2613   }
2614 
2615   return Match_Success;
2616 }
2617 
2618 // What asm variants we should check
2619 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2620   if (getForcedEncodingSize() == 32) {
2621     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2622     return makeArrayRef(Variants);
2623   }
2624 
2625   if (isForcedVOP3()) {
2626     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2627     return makeArrayRef(Variants);
2628   }
2629 
2630   if (isForcedSDWA()) {
2631     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2632                                         AMDGPUAsmVariants::SDWA9};
2633     return makeArrayRef(Variants);
2634   }
2635 
2636   if (isForcedDPP()) {
2637     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2638     return makeArrayRef(Variants);
2639   }
2640 
2641   static const unsigned Variants[] = {
2642     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2643     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2644   };
2645 
2646   return makeArrayRef(Variants);
2647 }
2648 
2649 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2650   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2651   const unsigned Num = Desc.getNumImplicitUses();
2652   for (unsigned i = 0; i < Num; ++i) {
2653     unsigned Reg = Desc.ImplicitUses[i];
2654     switch (Reg) {
2655     case AMDGPU::FLAT_SCR:
2656     case AMDGPU::VCC:
2657     case AMDGPU::VCC_LO:
2658     case AMDGPU::VCC_HI:
2659     case AMDGPU::M0:
2660       return Reg;
2661     default:
2662       break;
2663     }
2664   }
2665   return AMDGPU::NoRegister;
2666 }
2667 
2668 // NB: This code is correct only when used to check constant
2669 // bus limitations because GFX7 support no f16 inline constants.
2670 // Note that there are no cases when a GFX7 opcode violates
2671 // constant bus limitations due to the use of an f16 constant.
2672 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2673                                        unsigned OpIdx) const {
2674   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2675 
2676   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2677     return false;
2678   }
2679 
2680   const MCOperand &MO = Inst.getOperand(OpIdx);
2681 
2682   int64_t Val = MO.getImm();
2683   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2684 
2685   switch (OpSize) { // expected operand size
2686   case 8:
2687     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2688   case 4:
2689     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2690   case 2: {
2691     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2692     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2693         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2694         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2695         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2696         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2697         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2698       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2699     } else {
2700       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2701     }
2702   }
2703   default:
2704     llvm_unreachable("invalid operand size");
2705   }
2706 }
2707 
2708 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2709   if (!isGFX10())
2710     return 1;
2711 
2712   switch (Opcode) {
2713   // 64-bit shift instructions can use only one scalar value input
2714   case AMDGPU::V_LSHLREV_B64:
2715   case AMDGPU::V_LSHLREV_B64_gfx10:
2716   case AMDGPU::V_LSHL_B64:
2717   case AMDGPU::V_LSHRREV_B64:
2718   case AMDGPU::V_LSHRREV_B64_gfx10:
2719   case AMDGPU::V_LSHR_B64:
2720   case AMDGPU::V_ASHRREV_I64:
2721   case AMDGPU::V_ASHRREV_I64_gfx10:
2722   case AMDGPU::V_ASHR_I64:
2723     return 1;
2724   default:
2725     return 2;
2726   }
2727 }
2728 
2729 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2730   const MCOperand &MO = Inst.getOperand(OpIdx);
2731   if (MO.isImm()) {
2732     return !isInlineConstant(Inst, OpIdx);
2733   } else if (MO.isReg()) {
2734     auto Reg = MO.getReg();
2735     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2736     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2737   } else {
2738     return true;
2739   }
2740 }
2741 
2742 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2743   const unsigned Opcode = Inst.getOpcode();
2744   const MCInstrDesc &Desc = MII.get(Opcode);
2745   unsigned ConstantBusUseCount = 0;
2746   unsigned NumLiterals = 0;
2747   unsigned LiteralSize;
2748 
2749   if (Desc.TSFlags &
2750       (SIInstrFlags::VOPC |
2751        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2752        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2753        SIInstrFlags::SDWA)) {
2754     // Check special imm operands (used by madmk, etc)
2755     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2756       ++ConstantBusUseCount;
2757     }
2758 
2759     SmallDenseSet<unsigned> SGPRsUsed;
2760     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2761     if (SGPRUsed != AMDGPU::NoRegister) {
2762       SGPRsUsed.insert(SGPRUsed);
2763       ++ConstantBusUseCount;
2764     }
2765 
2766     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2767     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2768     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2769 
2770     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2771 
2772     for (int OpIdx : OpIndices) {
2773       if (OpIdx == -1) break;
2774 
2775       const MCOperand &MO = Inst.getOperand(OpIdx);
2776       if (usesConstantBus(Inst, OpIdx)) {
2777         if (MO.isReg()) {
2778           const unsigned Reg = mc2PseudoReg(MO.getReg());
2779           // Pairs of registers with a partial intersections like these
2780           //   s0, s[0:1]
2781           //   flat_scratch_lo, flat_scratch
2782           //   flat_scratch_lo, flat_scratch_hi
2783           // are theoretically valid but they are disabled anyway.
2784           // Note that this code mimics SIInstrInfo::verifyInstruction
2785           if (!SGPRsUsed.count(Reg)) {
2786             SGPRsUsed.insert(Reg);
2787             ++ConstantBusUseCount;
2788           }
2789         } else { // Expression or a literal
2790 
2791           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2792             continue; // special operand like VINTERP attr_chan
2793 
2794           // An instruction may use only one literal.
2795           // This has been validated on the previous step.
2796           // See validateVOP3Literal.
2797           // This literal may be used as more than one operand.
2798           // If all these operands are of the same size,
2799           // this literal counts as one scalar value.
2800           // Otherwise it counts as 2 scalar values.
2801           // See "GFX10 Shader Programming", section 3.6.2.3.
2802 
2803           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2804           if (Size < 4) Size = 4;
2805 
2806           if (NumLiterals == 0) {
2807             NumLiterals = 1;
2808             LiteralSize = Size;
2809           } else if (LiteralSize != Size) {
2810             NumLiterals = 2;
2811           }
2812         }
2813       }
2814     }
2815   }
2816   ConstantBusUseCount += NumLiterals;
2817 
2818   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2819 }
2820 
2821 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2822   const unsigned Opcode = Inst.getOpcode();
2823   const MCInstrDesc &Desc = MII.get(Opcode);
2824 
2825   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2826   if (DstIdx == -1 ||
2827       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2828     return true;
2829   }
2830 
2831   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2832 
2833   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2834   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2835   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2836 
2837   assert(DstIdx != -1);
2838   const MCOperand &Dst = Inst.getOperand(DstIdx);
2839   assert(Dst.isReg());
2840   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2841 
2842   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2843 
2844   for (int SrcIdx : SrcIndices) {
2845     if (SrcIdx == -1) break;
2846     const MCOperand &Src = Inst.getOperand(SrcIdx);
2847     if (Src.isReg()) {
2848       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2849       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2850         return false;
2851       }
2852     }
2853   }
2854 
2855   return true;
2856 }
2857 
2858 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2859 
2860   const unsigned Opc = Inst.getOpcode();
2861   const MCInstrDesc &Desc = MII.get(Opc);
2862 
2863   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2864     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2865     assert(ClampIdx != -1);
2866     return Inst.getOperand(ClampIdx).getImm() == 0;
2867   }
2868 
2869   return true;
2870 }
2871 
2872 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2873 
2874   const unsigned Opc = Inst.getOpcode();
2875   const MCInstrDesc &Desc = MII.get(Opc);
2876 
2877   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2878     return true;
2879 
2880   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2881   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2882   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2883 
2884   assert(VDataIdx != -1);
2885   assert(DMaskIdx != -1);
2886   assert(TFEIdx != -1);
2887 
2888   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2889   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2890   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2891   if (DMask == 0)
2892     DMask = 1;
2893 
2894   unsigned DataSize =
2895     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2896   if (hasPackedD16()) {
2897     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2898     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2899       DataSize = (DataSize + 1) / 2;
2900   }
2901 
2902   return (VDataSize / 4) == DataSize + TFESize;
2903 }
2904 
2905 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2906   const unsigned Opc = Inst.getOpcode();
2907   const MCInstrDesc &Desc = MII.get(Opc);
2908 
2909   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2910     return true;
2911 
2912   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2913   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2914       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2915   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2916   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2917   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2918 
2919   assert(VAddr0Idx != -1);
2920   assert(SrsrcIdx != -1);
2921   assert(DimIdx != -1);
2922   assert(SrsrcIdx > VAddr0Idx);
2923 
2924   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2925   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2926   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2927   unsigned VAddrSize =
2928       IsNSA ? SrsrcIdx - VAddr0Idx
2929             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2930 
2931   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2932                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2933                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2934                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2935   if (!IsNSA) {
2936     if (AddrSize > 8)
2937       AddrSize = 16;
2938     else if (AddrSize > 4)
2939       AddrSize = 8;
2940   }
2941 
2942   return VAddrSize == AddrSize;
2943 }
2944 
2945 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2946 
2947   const unsigned Opc = Inst.getOpcode();
2948   const MCInstrDesc &Desc = MII.get(Opc);
2949 
2950   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2951     return true;
2952   if (!Desc.mayLoad() || !Desc.mayStore())
2953     return true; // Not atomic
2954 
2955   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2956   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2957 
2958   // This is an incomplete check because image_atomic_cmpswap
2959   // may only use 0x3 and 0xf while other atomic operations
2960   // may use 0x1 and 0x3. However these limitations are
2961   // verified when we check that dmask matches dst size.
2962   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2963 }
2964 
2965 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2966 
2967   const unsigned Opc = Inst.getOpcode();
2968   const MCInstrDesc &Desc = MII.get(Opc);
2969 
2970   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2971     return true;
2972 
2973   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2974   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2975 
2976   // GATHER4 instructions use dmask in a different fashion compared to
2977   // other MIMG instructions. The only useful DMASK values are
2978   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2979   // (red,red,red,red) etc.) The ISA document doesn't mention
2980   // this.
2981   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2982 }
2983 
2984 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2985 
2986   const unsigned Opc = Inst.getOpcode();
2987   const MCInstrDesc &Desc = MII.get(Opc);
2988 
2989   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2990     return true;
2991 
2992   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2993   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2994     if (isCI() || isSI())
2995       return false;
2996   }
2997 
2998   return true;
2999 }
3000 
3001 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3002   const unsigned Opc = Inst.getOpcode();
3003   const MCInstrDesc &Desc = MII.get(Opc);
3004 
3005   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3006     return true;
3007 
3008   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3009   if (DimIdx < 0)
3010     return true;
3011 
3012   long Imm = Inst.getOperand(DimIdx).getImm();
3013   if (Imm < 0 || Imm >= 8)
3014     return false;
3015 
3016   return true;
3017 }
3018 
3019 static bool IsRevOpcode(const unsigned Opcode)
3020 {
3021   switch (Opcode) {
3022   case AMDGPU::V_SUBREV_F32_e32:
3023   case AMDGPU::V_SUBREV_F32_e64:
3024   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3025   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3026   case AMDGPU::V_SUBREV_F32_e32_vi:
3027   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3028   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3029   case AMDGPU::V_SUBREV_F32_e64_vi:
3030 
3031   case AMDGPU::V_SUBREV_I32_e32:
3032   case AMDGPU::V_SUBREV_I32_e64:
3033   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3034   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3035 
3036   case AMDGPU::V_SUBBREV_U32_e32:
3037   case AMDGPU::V_SUBBREV_U32_e64:
3038   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3039   case AMDGPU::V_SUBBREV_U32_e32_vi:
3040   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3041   case AMDGPU::V_SUBBREV_U32_e64_vi:
3042 
3043   case AMDGPU::V_SUBREV_U32_e32:
3044   case AMDGPU::V_SUBREV_U32_e64:
3045   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3046   case AMDGPU::V_SUBREV_U32_e32_vi:
3047   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3048   case AMDGPU::V_SUBREV_U32_e64_vi:
3049 
3050   case AMDGPU::V_SUBREV_F16_e32:
3051   case AMDGPU::V_SUBREV_F16_e64:
3052   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3053   case AMDGPU::V_SUBREV_F16_e32_vi:
3054   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3055   case AMDGPU::V_SUBREV_F16_e64_vi:
3056 
3057   case AMDGPU::V_SUBREV_U16_e32:
3058   case AMDGPU::V_SUBREV_U16_e64:
3059   case AMDGPU::V_SUBREV_U16_e32_vi:
3060   case AMDGPU::V_SUBREV_U16_e64_vi:
3061 
3062   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3063   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3064   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3065 
3066   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3067   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3068 
3069   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3070   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3071 
3072   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3073   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3074 
3075   case AMDGPU::V_LSHRREV_B32_e32:
3076   case AMDGPU::V_LSHRREV_B32_e64:
3077   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3078   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3079   case AMDGPU::V_LSHRREV_B32_e32_vi:
3080   case AMDGPU::V_LSHRREV_B32_e64_vi:
3081   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3082   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3083 
3084   case AMDGPU::V_ASHRREV_I32_e32:
3085   case AMDGPU::V_ASHRREV_I32_e64:
3086   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3087   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3088   case AMDGPU::V_ASHRREV_I32_e32_vi:
3089   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3090   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3091   case AMDGPU::V_ASHRREV_I32_e64_vi:
3092 
3093   case AMDGPU::V_LSHLREV_B32_e32:
3094   case AMDGPU::V_LSHLREV_B32_e64:
3095   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3096   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3097   case AMDGPU::V_LSHLREV_B32_e32_vi:
3098   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3099   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3100   case AMDGPU::V_LSHLREV_B32_e64_vi:
3101 
3102   case AMDGPU::V_LSHLREV_B16_e32:
3103   case AMDGPU::V_LSHLREV_B16_e64:
3104   case AMDGPU::V_LSHLREV_B16_e32_vi:
3105   case AMDGPU::V_LSHLREV_B16_e64_vi:
3106   case AMDGPU::V_LSHLREV_B16_gfx10:
3107 
3108   case AMDGPU::V_LSHRREV_B16_e32:
3109   case AMDGPU::V_LSHRREV_B16_e64:
3110   case AMDGPU::V_LSHRREV_B16_e32_vi:
3111   case AMDGPU::V_LSHRREV_B16_e64_vi:
3112   case AMDGPU::V_LSHRREV_B16_gfx10:
3113 
3114   case AMDGPU::V_ASHRREV_I16_e32:
3115   case AMDGPU::V_ASHRREV_I16_e64:
3116   case AMDGPU::V_ASHRREV_I16_e32_vi:
3117   case AMDGPU::V_ASHRREV_I16_e64_vi:
3118   case AMDGPU::V_ASHRREV_I16_gfx10:
3119 
3120   case AMDGPU::V_LSHLREV_B64:
3121   case AMDGPU::V_LSHLREV_B64_gfx10:
3122   case AMDGPU::V_LSHLREV_B64_vi:
3123 
3124   case AMDGPU::V_LSHRREV_B64:
3125   case AMDGPU::V_LSHRREV_B64_gfx10:
3126   case AMDGPU::V_LSHRREV_B64_vi:
3127 
3128   case AMDGPU::V_ASHRREV_I64:
3129   case AMDGPU::V_ASHRREV_I64_gfx10:
3130   case AMDGPU::V_ASHRREV_I64_vi:
3131 
3132   case AMDGPU::V_PK_LSHLREV_B16:
3133   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3134   case AMDGPU::V_PK_LSHLREV_B16_vi:
3135 
3136   case AMDGPU::V_PK_LSHRREV_B16:
3137   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3138   case AMDGPU::V_PK_LSHRREV_B16_vi:
3139   case AMDGPU::V_PK_ASHRREV_I16:
3140   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3141   case AMDGPU::V_PK_ASHRREV_I16_vi:
3142     return true;
3143   default:
3144     return false;
3145   }
3146 }
3147 
3148 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3149 
3150   using namespace SIInstrFlags;
3151   const unsigned Opcode = Inst.getOpcode();
3152   const MCInstrDesc &Desc = MII.get(Opcode);
3153 
3154   // lds_direct register is defined so that it can be used
3155   // with 9-bit operands only. Ignore encodings which do not accept these.
3156   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3157     return true;
3158 
3159   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3160   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3161   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3162 
3163   const int SrcIndices[] = { Src1Idx, Src2Idx };
3164 
3165   // lds_direct cannot be specified as either src1 or src2.
3166   for (int SrcIdx : SrcIndices) {
3167     if (SrcIdx == -1) break;
3168     const MCOperand &Src = Inst.getOperand(SrcIdx);
3169     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3170       return false;
3171     }
3172   }
3173 
3174   if (Src0Idx == -1)
3175     return true;
3176 
3177   const MCOperand &Src = Inst.getOperand(Src0Idx);
3178   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3179     return true;
3180 
3181   // lds_direct is specified as src0. Check additional limitations.
3182   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3183 }
3184 
3185 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3186   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3187     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3188     if (Op.isFlatOffset())
3189       return Op.getStartLoc();
3190   }
3191   return getLoc();
3192 }
3193 
3194 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3195                                          const OperandVector &Operands) {
3196   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3197   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3198     return true;
3199 
3200   auto Opcode = Inst.getOpcode();
3201   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3202   assert(OpNum != -1);
3203 
3204   const auto &Op = Inst.getOperand(OpNum);
3205   if (!hasFlatOffsets() && Op.getImm() != 0) {
3206     Error(getFlatOffsetLoc(Operands),
3207           "flat offset modifier is not supported on this GPU");
3208     return false;
3209   }
3210 
3211   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3212   // For FLAT segment the offset must be positive;
3213   // MSB is ignored and forced to zero.
3214   unsigned OffsetSize = isGFX9() ? 13 : 12;
3215   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3216     if (!isIntN(OffsetSize, Op.getImm())) {
3217       Error(getFlatOffsetLoc(Operands),
3218             isGFX9() ? "expected a 13-bit signed offset" :
3219                        "expected a 12-bit signed offset");
3220       return false;
3221     }
3222   } else {
3223     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3224       Error(getFlatOffsetLoc(Operands),
3225             isGFX9() ? "expected a 12-bit unsigned offset" :
3226                        "expected an 11-bit unsigned offset");
3227       return false;
3228     }
3229   }
3230 
3231   return true;
3232 }
3233 
3234 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3235   unsigned Opcode = Inst.getOpcode();
3236   const MCInstrDesc &Desc = MII.get(Opcode);
3237   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3238     return true;
3239 
3240   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3241   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3242 
3243   const int OpIndices[] = { Src0Idx, Src1Idx };
3244 
3245   unsigned NumLiterals = 0;
3246   uint32_t LiteralValue;
3247 
3248   for (int OpIdx : OpIndices) {
3249     if (OpIdx == -1) break;
3250 
3251     const MCOperand &MO = Inst.getOperand(OpIdx);
3252     if (MO.isImm() &&
3253         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3254         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3255         !isInlineConstant(Inst, OpIdx)) {
3256       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3257       if (NumLiterals == 0 || LiteralValue != Value) {
3258         LiteralValue = Value;
3259         ++NumLiterals;
3260       }
3261     }
3262   }
3263 
3264   return NumLiterals <= 1;
3265 }
3266 
3267 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3268   const unsigned Opc = Inst.getOpcode();
3269   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3270       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3271     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3272     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3273 
3274     if (OpSel & ~3)
3275       return false;
3276   }
3277   return true;
3278 }
3279 
3280 // Check if VCC register matches wavefront size
3281 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3282   auto FB = getFeatureBits();
3283   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3284     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3285 }
3286 
3287 // VOP3 literal is only allowed in GFX10+ and only one can be used
3288 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3289   unsigned Opcode = Inst.getOpcode();
3290   const MCInstrDesc &Desc = MII.get(Opcode);
3291   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3292     return true;
3293 
3294   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3295   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3296   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3297 
3298   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3299 
3300   unsigned NumLiterals = 0;
3301   uint32_t LiteralValue;
3302 
3303   for (int OpIdx : OpIndices) {
3304     if (OpIdx == -1) break;
3305 
3306     const MCOperand &MO = Inst.getOperand(OpIdx);
3307     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3308       continue;
3309 
3310     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3311         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3312       return false;
3313 
3314     if (!isInlineConstant(Inst, OpIdx)) {
3315       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3316       if (NumLiterals == 0 || LiteralValue != Value) {
3317         LiteralValue = Value;
3318         ++NumLiterals;
3319       }
3320     }
3321   }
3322 
3323   return !NumLiterals ||
3324          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3325 }
3326 
3327 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3328                                           const SMLoc &IDLoc,
3329                                           const OperandVector &Operands) {
3330   if (!validateLdsDirect(Inst)) {
3331     Error(IDLoc,
3332       "invalid use of lds_direct");
3333     return false;
3334   }
3335   if (!validateSOPLiteral(Inst)) {
3336     Error(IDLoc,
3337       "only one literal operand is allowed");
3338     return false;
3339   }
3340   if (!validateVOP3Literal(Inst)) {
3341     Error(IDLoc,
3342       "invalid literal operand");
3343     return false;
3344   }
3345   if (!validateConstantBusLimitations(Inst)) {
3346     Error(IDLoc,
3347       "invalid operand (violates constant bus restrictions)");
3348     return false;
3349   }
3350   if (!validateEarlyClobberLimitations(Inst)) {
3351     Error(IDLoc,
3352       "destination must be different than all sources");
3353     return false;
3354   }
3355   if (!validateIntClampSupported(Inst)) {
3356     Error(IDLoc,
3357       "integer clamping is not supported on this GPU");
3358     return false;
3359   }
3360   if (!validateOpSel(Inst)) {
3361     Error(IDLoc,
3362       "invalid op_sel operand");
3363     return false;
3364   }
3365   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3366   if (!validateMIMGD16(Inst)) {
3367     Error(IDLoc,
3368       "d16 modifier is not supported on this GPU");
3369     return false;
3370   }
3371   if (!validateMIMGDim(Inst)) {
3372     Error(IDLoc, "dim modifier is required on this GPU");
3373     return false;
3374   }
3375   if (!validateMIMGDataSize(Inst)) {
3376     Error(IDLoc,
3377       "image data size does not match dmask and tfe");
3378     return false;
3379   }
3380   if (!validateMIMGAddrSize(Inst)) {
3381     Error(IDLoc,
3382       "image address size does not match dim and a16");
3383     return false;
3384   }
3385   if (!validateMIMGAtomicDMask(Inst)) {
3386     Error(IDLoc,
3387       "invalid atomic image dmask");
3388     return false;
3389   }
3390   if (!validateMIMGGatherDMask(Inst)) {
3391     Error(IDLoc,
3392       "invalid image_gather dmask: only one bit must be set");
3393     return false;
3394   }
3395   if (!validateFlatOffset(Inst, Operands)) {
3396     return false;
3397   }
3398 
3399   return true;
3400 }
3401 
3402 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3403                                             const FeatureBitset &FBS,
3404                                             unsigned VariantID = 0);
3405 
3406 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3407                                               OperandVector &Operands,
3408                                               MCStreamer &Out,
3409                                               uint64_t &ErrorInfo,
3410                                               bool MatchingInlineAsm) {
3411   MCInst Inst;
3412   unsigned Result = Match_Success;
3413   for (auto Variant : getMatchedVariants()) {
3414     uint64_t EI;
3415     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3416                                   Variant);
3417     // We order match statuses from least to most specific. We use most specific
3418     // status as resulting
3419     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3420     if ((R == Match_Success) ||
3421         (R == Match_PreferE32) ||
3422         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3423         (R == Match_InvalidOperand && Result != Match_MissingFeature
3424                                    && Result != Match_PreferE32) ||
3425         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3426                                    && Result != Match_MissingFeature
3427                                    && Result != Match_PreferE32)) {
3428       Result = R;
3429       ErrorInfo = EI;
3430     }
3431     if (R == Match_Success)
3432       break;
3433   }
3434 
3435   switch (Result) {
3436   default: break;
3437   case Match_Success:
3438     if (!validateInstruction(Inst, IDLoc, Operands)) {
3439       return true;
3440     }
3441     Inst.setLoc(IDLoc);
3442     Out.EmitInstruction(Inst, getSTI());
3443     return false;
3444 
3445   case Match_MissingFeature:
3446     return Error(IDLoc, "instruction not supported on this GPU");
3447 
3448   case Match_MnemonicFail: {
3449     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3450     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3451         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3452     return Error(IDLoc, "invalid instruction" + Suggestion,
3453                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3454   }
3455 
3456   case Match_InvalidOperand: {
3457     SMLoc ErrorLoc = IDLoc;
3458     if (ErrorInfo != ~0ULL) {
3459       if (ErrorInfo >= Operands.size()) {
3460         return Error(IDLoc, "too few operands for instruction");
3461       }
3462       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3463       if (ErrorLoc == SMLoc())
3464         ErrorLoc = IDLoc;
3465     }
3466     return Error(ErrorLoc, "invalid operand for instruction");
3467   }
3468 
3469   case Match_PreferE32:
3470     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3471                         "should be encoded as e32");
3472   }
3473   llvm_unreachable("Implement any new match types added!");
3474 }
3475 
3476 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3477   int64_t Tmp = -1;
3478   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3479     return true;
3480   }
3481   if (getParser().parseAbsoluteExpression(Tmp)) {
3482     return true;
3483   }
3484   Ret = static_cast<uint32_t>(Tmp);
3485   return false;
3486 }
3487 
3488 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3489                                                uint32_t &Minor) {
3490   if (ParseAsAbsoluteExpression(Major))
3491     return TokError("invalid major version");
3492 
3493   if (getLexer().isNot(AsmToken::Comma))
3494     return TokError("minor version number required, comma expected");
3495   Lex();
3496 
3497   if (ParseAsAbsoluteExpression(Minor))
3498     return TokError("invalid minor version");
3499 
3500   return false;
3501 }
3502 
3503 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3504   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3505     return TokError("directive only supported for amdgcn architecture");
3506 
3507   std::string Target;
3508 
3509   SMLoc TargetStart = getTok().getLoc();
3510   if (getParser().parseEscapedString(Target))
3511     return true;
3512   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3513 
3514   std::string ExpectedTarget;
3515   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3516   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3517 
3518   if (Target != ExpectedTargetOS.str())
3519     return getParser().Error(TargetRange.Start, "target must match options",
3520                              TargetRange);
3521 
3522   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3523   return false;
3524 }
3525 
3526 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3527   return getParser().Error(Range.Start, "value out of range", Range);
3528 }
3529 
3530 bool AMDGPUAsmParser::calculateGPRBlocks(
3531     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3532     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3533     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3534     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3535   // TODO(scott.linder): These calculations are duplicated from
3536   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3537   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3538 
3539   unsigned NumVGPRs = NextFreeVGPR;
3540   unsigned NumSGPRs = NextFreeSGPR;
3541 
3542   if (Version.Major >= 10)
3543     NumSGPRs = 0;
3544   else {
3545     unsigned MaxAddressableNumSGPRs =
3546         IsaInfo::getAddressableNumSGPRs(&getSTI());
3547 
3548     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3549         NumSGPRs > MaxAddressableNumSGPRs)
3550       return OutOfRangeError(SGPRRange);
3551 
3552     NumSGPRs +=
3553         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3554 
3555     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3556         NumSGPRs > MaxAddressableNumSGPRs)
3557       return OutOfRangeError(SGPRRange);
3558 
3559     if (Features.test(FeatureSGPRInitBug))
3560       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3561   }
3562 
3563   VGPRBlocks =
3564       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3565   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3566 
3567   return false;
3568 }
3569 
3570 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3571   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3572     return TokError("directive only supported for amdgcn architecture");
3573 
3574   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3575     return TokError("directive only supported for amdhsa OS");
3576 
3577   StringRef KernelName;
3578   if (getParser().parseIdentifier(KernelName))
3579     return true;
3580 
3581   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3582 
3583   StringSet<> Seen;
3584 
3585   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3586 
3587   SMRange VGPRRange;
3588   uint64_t NextFreeVGPR = 0;
3589   SMRange SGPRRange;
3590   uint64_t NextFreeSGPR = 0;
3591   unsigned UserSGPRCount = 0;
3592   bool ReserveVCC = true;
3593   bool ReserveFlatScr = true;
3594   bool ReserveXNACK = hasXNACK();
3595   Optional<bool> EnableWavefrontSize32;
3596 
3597   while (true) {
3598     while (getLexer().is(AsmToken::EndOfStatement))
3599       Lex();
3600 
3601     if (getLexer().isNot(AsmToken::Identifier))
3602       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3603 
3604     StringRef ID = getTok().getIdentifier();
3605     SMRange IDRange = getTok().getLocRange();
3606     Lex();
3607 
3608     if (ID == ".end_amdhsa_kernel")
3609       break;
3610 
3611     if (Seen.find(ID) != Seen.end())
3612       return TokError(".amdhsa_ directives cannot be repeated");
3613     Seen.insert(ID);
3614 
3615     SMLoc ValStart = getTok().getLoc();
3616     int64_t IVal;
3617     if (getParser().parseAbsoluteExpression(IVal))
3618       return true;
3619     SMLoc ValEnd = getTok().getLoc();
3620     SMRange ValRange = SMRange(ValStart, ValEnd);
3621 
3622     if (IVal < 0)
3623       return OutOfRangeError(ValRange);
3624 
3625     uint64_t Val = IVal;
3626 
3627 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3628   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3629     return OutOfRangeError(RANGE);                                             \
3630   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3631 
3632     if (ID == ".amdhsa_group_segment_fixed_size") {
3633       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3634         return OutOfRangeError(ValRange);
3635       KD.group_segment_fixed_size = Val;
3636     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3637       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3638         return OutOfRangeError(ValRange);
3639       KD.private_segment_fixed_size = Val;
3640     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3641       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3642                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3643                        Val, ValRange);
3644       if (Val)
3645         UserSGPRCount += 4;
3646     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3647       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3648                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3649                        ValRange);
3650       if (Val)
3651         UserSGPRCount += 2;
3652     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3653       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3654                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3655                        ValRange);
3656       if (Val)
3657         UserSGPRCount += 2;
3658     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3659       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3660                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3661                        Val, ValRange);
3662       if (Val)
3663         UserSGPRCount += 2;
3664     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3665       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3666                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3667                        ValRange);
3668       if (Val)
3669         UserSGPRCount += 2;
3670     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3671       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3672                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3673                        ValRange);
3674       if (Val)
3675         UserSGPRCount += 2;
3676     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3677       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3678                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3679                        Val, ValRange);
3680       if (Val)
3681         UserSGPRCount += 1;
3682     } else if (ID == ".amdhsa_wavefront_size32") {
3683       if (IVersion.Major < 10)
3684         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3685                                  IDRange);
3686       EnableWavefrontSize32 = Val;
3687       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3688                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3689                        Val, ValRange);
3690     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3691       PARSE_BITS_ENTRY(
3692           KD.compute_pgm_rsrc2,
3693           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3694           ValRange);
3695     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3696       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3697                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3698                        ValRange);
3699     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3700       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3701                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3702                        ValRange);
3703     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3704       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3705                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3706                        ValRange);
3707     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3708       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3709                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3710                        ValRange);
3711     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3712       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3713                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3714                        ValRange);
3715     } else if (ID == ".amdhsa_next_free_vgpr") {
3716       VGPRRange = ValRange;
3717       NextFreeVGPR = Val;
3718     } else if (ID == ".amdhsa_next_free_sgpr") {
3719       SGPRRange = ValRange;
3720       NextFreeSGPR = Val;
3721     } else if (ID == ".amdhsa_reserve_vcc") {
3722       if (!isUInt<1>(Val))
3723         return OutOfRangeError(ValRange);
3724       ReserveVCC = Val;
3725     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3726       if (IVersion.Major < 7)
3727         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3728                                  IDRange);
3729       if (!isUInt<1>(Val))
3730         return OutOfRangeError(ValRange);
3731       ReserveFlatScr = Val;
3732     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3733       if (IVersion.Major < 8)
3734         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3735                                  IDRange);
3736       if (!isUInt<1>(Val))
3737         return OutOfRangeError(ValRange);
3738       ReserveXNACK = Val;
3739     } else if (ID == ".amdhsa_float_round_mode_32") {
3740       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3741                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3742     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3744                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3745     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3746       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3747                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3748     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3749       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3750                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3751                        ValRange);
3752     } else if (ID == ".amdhsa_dx10_clamp") {
3753       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3754                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3755     } else if (ID == ".amdhsa_ieee_mode") {
3756       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3757                        Val, ValRange);
3758     } else if (ID == ".amdhsa_fp16_overflow") {
3759       if (IVersion.Major < 9)
3760         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3761                                  IDRange);
3762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3763                        ValRange);
3764     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3765       if (IVersion.Major < 10)
3766         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3767                                  IDRange);
3768       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3769                        ValRange);
3770     } else if (ID == ".amdhsa_memory_ordered") {
3771       if (IVersion.Major < 10)
3772         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3773                                  IDRange);
3774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3775                        ValRange);
3776     } else if (ID == ".amdhsa_forward_progress") {
3777       if (IVersion.Major < 10)
3778         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3779                                  IDRange);
3780       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3781                        ValRange);
3782     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3783       PARSE_BITS_ENTRY(
3784           KD.compute_pgm_rsrc2,
3785           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3786           ValRange);
3787     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3788       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3789                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3790                        Val, ValRange);
3791     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3792       PARSE_BITS_ENTRY(
3793           KD.compute_pgm_rsrc2,
3794           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3795           ValRange);
3796     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3797       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3798                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3799                        Val, ValRange);
3800     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3801       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3802                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3803                        Val, ValRange);
3804     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3805       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3806                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3807                        Val, ValRange);
3808     } else if (ID == ".amdhsa_exception_int_div_zero") {
3809       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3810                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3811                        Val, ValRange);
3812     } else {
3813       return getParser().Error(IDRange.Start,
3814                                "unknown .amdhsa_kernel directive", IDRange);
3815     }
3816 
3817 #undef PARSE_BITS_ENTRY
3818   }
3819 
3820   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3821     return TokError(".amdhsa_next_free_vgpr directive is required");
3822 
3823   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3824     return TokError(".amdhsa_next_free_sgpr directive is required");
3825 
3826   unsigned VGPRBlocks;
3827   unsigned SGPRBlocks;
3828   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3829                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3830                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3831                          SGPRBlocks))
3832     return true;
3833 
3834   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3835           VGPRBlocks))
3836     return OutOfRangeError(VGPRRange);
3837   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3838                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3839 
3840   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3841           SGPRBlocks))
3842     return OutOfRangeError(SGPRRange);
3843   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3844                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3845                   SGPRBlocks);
3846 
3847   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3848     return TokError("too many user SGPRs enabled");
3849   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3850                   UserSGPRCount);
3851 
3852   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3853       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3854       ReserveFlatScr, ReserveXNACK);
3855   return false;
3856 }
3857 
3858 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3859   uint32_t Major;
3860   uint32_t Minor;
3861 
3862   if (ParseDirectiveMajorMinor(Major, Minor))
3863     return true;
3864 
3865   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3866   return false;
3867 }
3868 
3869 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3870   uint32_t Major;
3871   uint32_t Minor;
3872   uint32_t Stepping;
3873   StringRef VendorName;
3874   StringRef ArchName;
3875 
3876   // If this directive has no arguments, then use the ISA version for the
3877   // targeted GPU.
3878   if (getLexer().is(AsmToken::EndOfStatement)) {
3879     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3880     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3881                                                       ISA.Stepping,
3882                                                       "AMD", "AMDGPU");
3883     return false;
3884   }
3885 
3886   if (ParseDirectiveMajorMinor(Major, Minor))
3887     return true;
3888 
3889   if (getLexer().isNot(AsmToken::Comma))
3890     return TokError("stepping version number required, comma expected");
3891   Lex();
3892 
3893   if (ParseAsAbsoluteExpression(Stepping))
3894     return TokError("invalid stepping version");
3895 
3896   if (getLexer().isNot(AsmToken::Comma))
3897     return TokError("vendor name required, comma expected");
3898   Lex();
3899 
3900   if (getLexer().isNot(AsmToken::String))
3901     return TokError("invalid vendor name");
3902 
3903   VendorName = getLexer().getTok().getStringContents();
3904   Lex();
3905 
3906   if (getLexer().isNot(AsmToken::Comma))
3907     return TokError("arch name required, comma expected");
3908   Lex();
3909 
3910   if (getLexer().isNot(AsmToken::String))
3911     return TokError("invalid arch name");
3912 
3913   ArchName = getLexer().getTok().getStringContents();
3914   Lex();
3915 
3916   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3917                                                     VendorName, ArchName);
3918   return false;
3919 }
3920 
3921 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3922                                                amd_kernel_code_t &Header) {
3923   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3924   // assembly for backwards compatibility.
3925   if (ID == "max_scratch_backing_memory_byte_size") {
3926     Parser.eatToEndOfStatement();
3927     return false;
3928   }
3929 
3930   SmallString<40> ErrStr;
3931   raw_svector_ostream Err(ErrStr);
3932   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3933     return TokError(Err.str());
3934   }
3935   Lex();
3936 
3937   if (ID == "enable_wavefront_size32") {
3938     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3939       if (!isGFX10())
3940         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3941       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3942         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3943     } else {
3944       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3945         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3946     }
3947   }
3948 
3949   if (ID == "wavefront_size") {
3950     if (Header.wavefront_size == 5) {
3951       if (!isGFX10())
3952         return TokError("wavefront_size=5 is only allowed on GFX10+");
3953       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3954         return TokError("wavefront_size=5 requires +WavefrontSize32");
3955     } else if (Header.wavefront_size == 6) {
3956       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3957         return TokError("wavefront_size=6 requires +WavefrontSize64");
3958     }
3959   }
3960 
3961   if (ID == "enable_wgp_mode") {
3962     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3963       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3964   }
3965 
3966   if (ID == "enable_mem_ordered") {
3967     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3968       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3969   }
3970 
3971   if (ID == "enable_fwd_progress") {
3972     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3973       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3974   }
3975 
3976   return false;
3977 }
3978 
3979 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3980   amd_kernel_code_t Header;
3981   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3982 
3983   while (true) {
3984     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3985     // will set the current token to EndOfStatement.
3986     while(getLexer().is(AsmToken::EndOfStatement))
3987       Lex();
3988 
3989     if (getLexer().isNot(AsmToken::Identifier))
3990       return TokError("expected value identifier or .end_amd_kernel_code_t");
3991 
3992     StringRef ID = getLexer().getTok().getIdentifier();
3993     Lex();
3994 
3995     if (ID == ".end_amd_kernel_code_t")
3996       break;
3997 
3998     if (ParseAMDKernelCodeTValue(ID, Header))
3999       return true;
4000   }
4001 
4002   getTargetStreamer().EmitAMDKernelCodeT(Header);
4003 
4004   return false;
4005 }
4006 
4007 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4008   if (getLexer().isNot(AsmToken::Identifier))
4009     return TokError("expected symbol name");
4010 
4011   StringRef KernelName = Parser.getTok().getString();
4012 
4013   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4014                                            ELF::STT_AMDGPU_HSA_KERNEL);
4015   Lex();
4016   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4017     KernelScope.initialize(getContext());
4018   return false;
4019 }
4020 
4021 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4022   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4023     return Error(getParser().getTok().getLoc(),
4024                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4025                  "architectures");
4026   }
4027 
4028   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4029 
4030   std::string ISAVersionStringFromSTI;
4031   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4032   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4033 
4034   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4035     return Error(getParser().getTok().getLoc(),
4036                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4037                  "arguments specified through the command line");
4038   }
4039 
4040   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4041   Lex();
4042 
4043   return false;
4044 }
4045 
4046 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4047   const char *AssemblerDirectiveBegin;
4048   const char *AssemblerDirectiveEnd;
4049   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4050       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4051           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4052                             HSAMD::V3::AssemblerDirectiveEnd)
4053           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4054                             HSAMD::AssemblerDirectiveEnd);
4055 
4056   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4057     return Error(getParser().getTok().getLoc(),
4058                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4059                  "not available on non-amdhsa OSes")).str());
4060   }
4061 
4062   std::string HSAMetadataString;
4063   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4064                           HSAMetadataString))
4065     return true;
4066 
4067   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4068     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4069       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4070   } else {
4071     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4072       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4073   }
4074 
4075   return false;
4076 }
4077 
4078 /// Common code to parse out a block of text (typically YAML) between start and
4079 /// end directives.
4080 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4081                                           const char *AssemblerDirectiveEnd,
4082                                           std::string &CollectString) {
4083 
4084   raw_string_ostream CollectStream(CollectString);
4085 
4086   getLexer().setSkipSpace(false);
4087 
4088   bool FoundEnd = false;
4089   while (!getLexer().is(AsmToken::Eof)) {
4090     while (getLexer().is(AsmToken::Space)) {
4091       CollectStream << getLexer().getTok().getString();
4092       Lex();
4093     }
4094 
4095     if (getLexer().is(AsmToken::Identifier)) {
4096       StringRef ID = getLexer().getTok().getIdentifier();
4097       if (ID == AssemblerDirectiveEnd) {
4098         Lex();
4099         FoundEnd = true;
4100         break;
4101       }
4102     }
4103 
4104     CollectStream << Parser.parseStringToEndOfStatement()
4105                   << getContext().getAsmInfo()->getSeparatorString();
4106 
4107     Parser.eatToEndOfStatement();
4108   }
4109 
4110   getLexer().setSkipSpace(true);
4111 
4112   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4113     return TokError(Twine("expected directive ") +
4114                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4115   }
4116 
4117   CollectStream.flush();
4118   return false;
4119 }
4120 
4121 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4122 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4123   std::string String;
4124   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4125                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4126     return true;
4127 
4128   auto PALMetadata = getTargetStreamer().getPALMetadata();
4129   if (!PALMetadata->setFromString(String))
4130     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4131   return false;
4132 }
4133 
4134 /// Parse the assembler directive for old linear-format PAL metadata.
4135 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4136   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4137     return Error(getParser().getTok().getLoc(),
4138                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4139                  "not available on non-amdpal OSes")).str());
4140   }
4141 
4142   auto PALMetadata = getTargetStreamer().getPALMetadata();
4143   PALMetadata->setLegacy();
4144   for (;;) {
4145     uint32_t Key, Value;
4146     if (ParseAsAbsoluteExpression(Key)) {
4147       return TokError(Twine("invalid value in ") +
4148                       Twine(PALMD::AssemblerDirective));
4149     }
4150     if (getLexer().isNot(AsmToken::Comma)) {
4151       return TokError(Twine("expected an even number of values in ") +
4152                       Twine(PALMD::AssemblerDirective));
4153     }
4154     Lex();
4155     if (ParseAsAbsoluteExpression(Value)) {
4156       return TokError(Twine("invalid value in ") +
4157                       Twine(PALMD::AssemblerDirective));
4158     }
4159     PALMetadata->setRegister(Key, Value);
4160     if (getLexer().isNot(AsmToken::Comma))
4161       break;
4162     Lex();
4163   }
4164   return false;
4165 }
4166 
4167 /// ParseDirectiveAMDGPULDS
4168 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4169 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4170   if (getParser().checkForValidSection())
4171     return true;
4172 
4173   StringRef Name;
4174   SMLoc NameLoc = getLexer().getLoc();
4175   if (getParser().parseIdentifier(Name))
4176     return TokError("expected identifier in directive");
4177 
4178   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4179   if (parseToken(AsmToken::Comma, "expected ','"))
4180     return true;
4181 
4182   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4183 
4184   int64_t Size;
4185   SMLoc SizeLoc = getLexer().getLoc();
4186   if (getParser().parseAbsoluteExpression(Size))
4187     return true;
4188   if (Size < 0)
4189     return Error(SizeLoc, "size must be non-negative");
4190   if (Size > LocalMemorySize)
4191     return Error(SizeLoc, "size is too large");
4192 
4193   int64_t Align = 4;
4194   if (getLexer().is(AsmToken::Comma)) {
4195     Lex();
4196     SMLoc AlignLoc = getLexer().getLoc();
4197     if (getParser().parseAbsoluteExpression(Align))
4198       return true;
4199     if (Align < 0 || !isPowerOf2_64(Align))
4200       return Error(AlignLoc, "alignment must be a power of two");
4201 
4202     // Alignment larger than the size of LDS is possible in theory, as long
4203     // as the linker manages to place to symbol at address 0, but we do want
4204     // to make sure the alignment fits nicely into a 32-bit integer.
4205     if (Align >= 1u << 31)
4206       return Error(AlignLoc, "alignment is too large");
4207   }
4208 
4209   if (parseToken(AsmToken::EndOfStatement,
4210                  "unexpected token in '.amdgpu_lds' directive"))
4211     return true;
4212 
4213   Symbol->redefineIfPossible();
4214   if (!Symbol->isUndefined())
4215     return Error(NameLoc, "invalid symbol redefinition");
4216 
4217   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4218   return false;
4219 }
4220 
4221 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4222   StringRef IDVal = DirectiveID.getString();
4223 
4224   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4225     if (IDVal == ".amdgcn_target")
4226       return ParseDirectiveAMDGCNTarget();
4227 
4228     if (IDVal == ".amdhsa_kernel")
4229       return ParseDirectiveAMDHSAKernel();
4230 
4231     // TODO: Restructure/combine with PAL metadata directive.
4232     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4233       return ParseDirectiveHSAMetadata();
4234   } else {
4235     if (IDVal == ".hsa_code_object_version")
4236       return ParseDirectiveHSACodeObjectVersion();
4237 
4238     if (IDVal == ".hsa_code_object_isa")
4239       return ParseDirectiveHSACodeObjectISA();
4240 
4241     if (IDVal == ".amd_kernel_code_t")
4242       return ParseDirectiveAMDKernelCodeT();
4243 
4244     if (IDVal == ".amdgpu_hsa_kernel")
4245       return ParseDirectiveAMDGPUHsaKernel();
4246 
4247     if (IDVal == ".amd_amdgpu_isa")
4248       return ParseDirectiveISAVersion();
4249 
4250     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4251       return ParseDirectiveHSAMetadata();
4252   }
4253 
4254   if (IDVal == ".amdgpu_lds")
4255     return ParseDirectiveAMDGPULDS();
4256 
4257   if (IDVal == PALMD::AssemblerDirectiveBegin)
4258     return ParseDirectivePALMetadataBegin();
4259 
4260   if (IDVal == PALMD::AssemblerDirective)
4261     return ParseDirectivePALMetadata();
4262 
4263   return true;
4264 }
4265 
4266 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4267                                            unsigned RegNo) const {
4268 
4269   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4270        R.isValid(); ++R) {
4271     if (*R == RegNo)
4272       return isGFX9() || isGFX10();
4273   }
4274 
4275   // GFX10 has 2 more SGPRs 104 and 105.
4276   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4277        R.isValid(); ++R) {
4278     if (*R == RegNo)
4279       return hasSGPR104_SGPR105();
4280   }
4281 
4282   switch (RegNo) {
4283   case AMDGPU::SRC_SHARED_BASE:
4284   case AMDGPU::SRC_SHARED_LIMIT:
4285   case AMDGPU::SRC_PRIVATE_BASE:
4286   case AMDGPU::SRC_PRIVATE_LIMIT:
4287   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4288     return !isCI() && !isSI() && !isVI();
4289   case AMDGPU::TBA:
4290   case AMDGPU::TBA_LO:
4291   case AMDGPU::TBA_HI:
4292   case AMDGPU::TMA:
4293   case AMDGPU::TMA_LO:
4294   case AMDGPU::TMA_HI:
4295     return !isGFX9() && !isGFX10();
4296   case AMDGPU::XNACK_MASK:
4297   case AMDGPU::XNACK_MASK_LO:
4298   case AMDGPU::XNACK_MASK_HI:
4299     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4300   case AMDGPU::SGPR_NULL:
4301     return isGFX10();
4302   default:
4303     break;
4304   }
4305 
4306   if (isCI())
4307     return true;
4308 
4309   if (isSI() || isGFX10()) {
4310     // No flat_scr on SI.
4311     // On GFX10 flat scratch is not a valid register operand and can only be
4312     // accessed with s_setreg/s_getreg.
4313     switch (RegNo) {
4314     case AMDGPU::FLAT_SCR:
4315     case AMDGPU::FLAT_SCR_LO:
4316     case AMDGPU::FLAT_SCR_HI:
4317       return false;
4318     default:
4319       return true;
4320     }
4321   }
4322 
4323   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4324   // SI/CI have.
4325   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4326        R.isValid(); ++R) {
4327     if (*R == RegNo)
4328       return hasSGPR102_SGPR103();
4329   }
4330 
4331   return true;
4332 }
4333 
4334 OperandMatchResultTy
4335 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4336                               OperandMode Mode) {
4337   // Try to parse with a custom parser
4338   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4339 
4340   // If we successfully parsed the operand or if there as an error parsing,
4341   // we are done.
4342   //
4343   // If we are parsing after we reach EndOfStatement then this means we
4344   // are appending default values to the Operands list.  This is only done
4345   // by custom parser, so we shouldn't continue on to the generic parsing.
4346   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4347       getLexer().is(AsmToken::EndOfStatement))
4348     return ResTy;
4349 
4350   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4351     unsigned Prefix = Operands.size();
4352     SMLoc LBraceLoc = getTok().getLoc();
4353     Parser.Lex(); // eat the '['
4354 
4355     for (;;) {
4356       ResTy = parseReg(Operands);
4357       if (ResTy != MatchOperand_Success)
4358         return ResTy;
4359 
4360       if (getLexer().is(AsmToken::RBrac))
4361         break;
4362 
4363       if (getLexer().isNot(AsmToken::Comma))
4364         return MatchOperand_ParseFail;
4365       Parser.Lex();
4366     }
4367 
4368     if (Operands.size() - Prefix > 1) {
4369       Operands.insert(Operands.begin() + Prefix,
4370                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4371       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4372                                                     getTok().getLoc()));
4373     }
4374 
4375     Parser.Lex(); // eat the ']'
4376     return MatchOperand_Success;
4377   }
4378 
4379   return parseRegOrImm(Operands);
4380 }
4381 
4382 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4383   // Clear any forced encodings from the previous instruction.
4384   setForcedEncodingSize(0);
4385   setForcedDPP(false);
4386   setForcedSDWA(false);
4387 
4388   if (Name.endswith("_e64")) {
4389     setForcedEncodingSize(64);
4390     return Name.substr(0, Name.size() - 4);
4391   } else if (Name.endswith("_e32")) {
4392     setForcedEncodingSize(32);
4393     return Name.substr(0, Name.size() - 4);
4394   } else if (Name.endswith("_dpp")) {
4395     setForcedDPP(true);
4396     return Name.substr(0, Name.size() - 4);
4397   } else if (Name.endswith("_sdwa")) {
4398     setForcedSDWA(true);
4399     return Name.substr(0, Name.size() - 5);
4400   }
4401   return Name;
4402 }
4403 
4404 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4405                                        StringRef Name,
4406                                        SMLoc NameLoc, OperandVector &Operands) {
4407   // Add the instruction mnemonic
4408   Name = parseMnemonicSuffix(Name);
4409   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4410 
4411   bool IsMIMG = Name.startswith("image_");
4412 
4413   while (!getLexer().is(AsmToken::EndOfStatement)) {
4414     OperandMode Mode = OperandMode_Default;
4415     if (IsMIMG && isGFX10() && Operands.size() == 2)
4416       Mode = OperandMode_NSA;
4417     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4418 
4419     // Eat the comma or space if there is one.
4420     if (getLexer().is(AsmToken::Comma))
4421       Parser.Lex();
4422 
4423     switch (Res) {
4424       case MatchOperand_Success: break;
4425       case MatchOperand_ParseFail:
4426         // FIXME: use real operand location rather than the current location.
4427         Error(getLexer().getLoc(), "failed parsing operand.");
4428         while (!getLexer().is(AsmToken::EndOfStatement)) {
4429           Parser.Lex();
4430         }
4431         return true;
4432       case MatchOperand_NoMatch:
4433         // FIXME: use real operand location rather than the current location.
4434         Error(getLexer().getLoc(), "not a valid operand.");
4435         while (!getLexer().is(AsmToken::EndOfStatement)) {
4436           Parser.Lex();
4437         }
4438         return true;
4439     }
4440   }
4441 
4442   return false;
4443 }
4444 
4445 //===----------------------------------------------------------------------===//
4446 // Utility functions
4447 //===----------------------------------------------------------------------===//
4448 
4449 OperandMatchResultTy
4450 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4451 
4452   if (!trySkipId(Prefix, AsmToken::Colon))
4453     return MatchOperand_NoMatch;
4454 
4455   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4456 }
4457 
4458 OperandMatchResultTy
4459 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4460                                     AMDGPUOperand::ImmTy ImmTy,
4461                                     bool (*ConvertResult)(int64_t&)) {
4462   SMLoc S = getLoc();
4463   int64_t Value = 0;
4464 
4465   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4466   if (Res != MatchOperand_Success)
4467     return Res;
4468 
4469   if (ConvertResult && !ConvertResult(Value)) {
4470     Error(S, "invalid " + StringRef(Prefix) + " value.");
4471   }
4472 
4473   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4474   return MatchOperand_Success;
4475 }
4476 
4477 OperandMatchResultTy
4478 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4479                                              OperandVector &Operands,
4480                                              AMDGPUOperand::ImmTy ImmTy,
4481                                              bool (*ConvertResult)(int64_t&)) {
4482   SMLoc S = getLoc();
4483   if (!trySkipId(Prefix, AsmToken::Colon))
4484     return MatchOperand_NoMatch;
4485 
4486   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4487     return MatchOperand_ParseFail;
4488 
4489   unsigned Val = 0;
4490   const unsigned MaxSize = 4;
4491 
4492   // FIXME: How to verify the number of elements matches the number of src
4493   // operands?
4494   for (int I = 0; ; ++I) {
4495     int64_t Op;
4496     SMLoc Loc = getLoc();
4497     if (!parseExpr(Op))
4498       return MatchOperand_ParseFail;
4499 
4500     if (Op != 0 && Op != 1) {
4501       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4502       return MatchOperand_ParseFail;
4503     }
4504 
4505     Val |= (Op << I);
4506 
4507     if (trySkipToken(AsmToken::RBrac))
4508       break;
4509 
4510     if (I + 1 == MaxSize) {
4511       Error(getLoc(), "expected a closing square bracket");
4512       return MatchOperand_ParseFail;
4513     }
4514 
4515     if (!skipToken(AsmToken::Comma, "expected a comma"))
4516       return MatchOperand_ParseFail;
4517   }
4518 
4519   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4520   return MatchOperand_Success;
4521 }
4522 
4523 OperandMatchResultTy
4524 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4525                                AMDGPUOperand::ImmTy ImmTy) {
4526   int64_t Bit = 0;
4527   SMLoc S = Parser.getTok().getLoc();
4528 
4529   // We are at the end of the statement, and this is a default argument, so
4530   // use a default value.
4531   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4532     switch(getLexer().getKind()) {
4533       case AsmToken::Identifier: {
4534         StringRef Tok = Parser.getTok().getString();
4535         if (Tok == Name) {
4536           if (Tok == "r128" && isGFX9())
4537             Error(S, "r128 modifier is not supported on this GPU");
4538           if (Tok == "a16" && !isGFX9() && !isGFX10())
4539             Error(S, "a16 modifier is not supported on this GPU");
4540           Bit = 1;
4541           Parser.Lex();
4542         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4543           Bit = 0;
4544           Parser.Lex();
4545         } else {
4546           return MatchOperand_NoMatch;
4547         }
4548         break;
4549       }
4550       default:
4551         return MatchOperand_NoMatch;
4552     }
4553   }
4554 
4555   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4556     return MatchOperand_ParseFail;
4557 
4558   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4559   return MatchOperand_Success;
4560 }
4561 
4562 static void addOptionalImmOperand(
4563   MCInst& Inst, const OperandVector& Operands,
4564   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4565   AMDGPUOperand::ImmTy ImmT,
4566   int64_t Default = 0) {
4567   auto i = OptionalIdx.find(ImmT);
4568   if (i != OptionalIdx.end()) {
4569     unsigned Idx = i->second;
4570     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4571   } else {
4572     Inst.addOperand(MCOperand::createImm(Default));
4573   }
4574 }
4575 
4576 OperandMatchResultTy
4577 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4578   if (getLexer().isNot(AsmToken::Identifier)) {
4579     return MatchOperand_NoMatch;
4580   }
4581   StringRef Tok = Parser.getTok().getString();
4582   if (Tok != Prefix) {
4583     return MatchOperand_NoMatch;
4584   }
4585 
4586   Parser.Lex();
4587   if (getLexer().isNot(AsmToken::Colon)) {
4588     return MatchOperand_ParseFail;
4589   }
4590 
4591   Parser.Lex();
4592   if (getLexer().isNot(AsmToken::Identifier)) {
4593     return MatchOperand_ParseFail;
4594   }
4595 
4596   Value = Parser.getTok().getString();
4597   return MatchOperand_Success;
4598 }
4599 
4600 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4601 // values to live in a joint format operand in the MCInst encoding.
4602 OperandMatchResultTy
4603 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4604   SMLoc S = Parser.getTok().getLoc();
4605   int64_t Dfmt = 0, Nfmt = 0;
4606   // dfmt and nfmt can appear in either order, and each is optional.
4607   bool GotDfmt = false, GotNfmt = false;
4608   while (!GotDfmt || !GotNfmt) {
4609     if (!GotDfmt) {
4610       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4611       if (Res != MatchOperand_NoMatch) {
4612         if (Res != MatchOperand_Success)
4613           return Res;
4614         if (Dfmt >= 16) {
4615           Error(Parser.getTok().getLoc(), "out of range dfmt");
4616           return MatchOperand_ParseFail;
4617         }
4618         GotDfmt = true;
4619         Parser.Lex();
4620         continue;
4621       }
4622     }
4623     if (!GotNfmt) {
4624       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4625       if (Res != MatchOperand_NoMatch) {
4626         if (Res != MatchOperand_Success)
4627           return Res;
4628         if (Nfmt >= 8) {
4629           Error(Parser.getTok().getLoc(), "out of range nfmt");
4630           return MatchOperand_ParseFail;
4631         }
4632         GotNfmt = true;
4633         Parser.Lex();
4634         continue;
4635       }
4636     }
4637     break;
4638   }
4639   if (!GotDfmt && !GotNfmt)
4640     return MatchOperand_NoMatch;
4641   auto Format = Dfmt | Nfmt << 4;
4642   Operands.push_back(
4643       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4644   return MatchOperand_Success;
4645 }
4646 
4647 //===----------------------------------------------------------------------===//
4648 // ds
4649 //===----------------------------------------------------------------------===//
4650 
4651 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4652                                     const OperandVector &Operands) {
4653   OptionalImmIndexMap OptionalIdx;
4654 
4655   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4656     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4657 
4658     // Add the register arguments
4659     if (Op.isReg()) {
4660       Op.addRegOperands(Inst, 1);
4661       continue;
4662     }
4663 
4664     // Handle optional arguments
4665     OptionalIdx[Op.getImmTy()] = i;
4666   }
4667 
4668   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4669   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4670   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4671 
4672   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4673 }
4674 
4675 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4676                                 bool IsGdsHardcoded) {
4677   OptionalImmIndexMap OptionalIdx;
4678 
4679   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4680     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4681 
4682     // Add the register arguments
4683     if (Op.isReg()) {
4684       Op.addRegOperands(Inst, 1);
4685       continue;
4686     }
4687 
4688     if (Op.isToken() && Op.getToken() == "gds") {
4689       IsGdsHardcoded = true;
4690       continue;
4691     }
4692 
4693     // Handle optional arguments
4694     OptionalIdx[Op.getImmTy()] = i;
4695   }
4696 
4697   AMDGPUOperand::ImmTy OffsetType =
4698     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4699      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4700      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4701                                                       AMDGPUOperand::ImmTyOffset;
4702 
4703   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4704 
4705   if (!IsGdsHardcoded) {
4706     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4707   }
4708   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4709 }
4710 
4711 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4712   OptionalImmIndexMap OptionalIdx;
4713 
4714   unsigned OperandIdx[4];
4715   unsigned EnMask = 0;
4716   int SrcIdx = 0;
4717 
4718   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4719     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4720 
4721     // Add the register arguments
4722     if (Op.isReg()) {
4723       assert(SrcIdx < 4);
4724       OperandIdx[SrcIdx] = Inst.size();
4725       Op.addRegOperands(Inst, 1);
4726       ++SrcIdx;
4727       continue;
4728     }
4729 
4730     if (Op.isOff()) {
4731       assert(SrcIdx < 4);
4732       OperandIdx[SrcIdx] = Inst.size();
4733       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4734       ++SrcIdx;
4735       continue;
4736     }
4737 
4738     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4739       Op.addImmOperands(Inst, 1);
4740       continue;
4741     }
4742 
4743     if (Op.isToken() && Op.getToken() == "done")
4744       continue;
4745 
4746     // Handle optional arguments
4747     OptionalIdx[Op.getImmTy()] = i;
4748   }
4749 
4750   assert(SrcIdx == 4);
4751 
4752   bool Compr = false;
4753   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4754     Compr = true;
4755     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4756     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4757     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4758   }
4759 
4760   for (auto i = 0; i < SrcIdx; ++i) {
4761     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4762       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4763     }
4764   }
4765 
4766   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4767   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4768 
4769   Inst.addOperand(MCOperand::createImm(EnMask));
4770 }
4771 
4772 //===----------------------------------------------------------------------===//
4773 // s_waitcnt
4774 //===----------------------------------------------------------------------===//
4775 
4776 static bool
4777 encodeCnt(
4778   const AMDGPU::IsaVersion ISA,
4779   int64_t &IntVal,
4780   int64_t CntVal,
4781   bool Saturate,
4782   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4783   unsigned (*decode)(const IsaVersion &Version, unsigned))
4784 {
4785   bool Failed = false;
4786 
4787   IntVal = encode(ISA, IntVal, CntVal);
4788   if (CntVal != decode(ISA, IntVal)) {
4789     if (Saturate) {
4790       IntVal = encode(ISA, IntVal, -1);
4791     } else {
4792       Failed = true;
4793     }
4794   }
4795   return Failed;
4796 }
4797 
4798 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4799 
4800   SMLoc CntLoc = getLoc();
4801   StringRef CntName = getTokenStr();
4802 
4803   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4804       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4805     return false;
4806 
4807   int64_t CntVal;
4808   SMLoc ValLoc = getLoc();
4809   if (!parseExpr(CntVal))
4810     return false;
4811 
4812   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4813 
4814   bool Failed = true;
4815   bool Sat = CntName.endswith("_sat");
4816 
4817   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4818     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4819   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4820     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4821   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4822     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4823   } else {
4824     Error(CntLoc, "invalid counter name " + CntName);
4825     return false;
4826   }
4827 
4828   if (Failed) {
4829     Error(ValLoc, "too large value for " + CntName);
4830     return false;
4831   }
4832 
4833   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4834     return false;
4835 
4836   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4837     if (isToken(AsmToken::EndOfStatement)) {
4838       Error(getLoc(), "expected a counter name");
4839       return false;
4840     }
4841   }
4842 
4843   return true;
4844 }
4845 
4846 OperandMatchResultTy
4847 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4848   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4849   int64_t Waitcnt = getWaitcntBitMask(ISA);
4850   SMLoc S = getLoc();
4851 
4852   // If parse failed, do not return error code
4853   // to avoid excessive error messages.
4854   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4855     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4856   } else {
4857     parseExpr(Waitcnt);
4858   }
4859 
4860   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4861   return MatchOperand_Success;
4862 }
4863 
4864 bool
4865 AMDGPUOperand::isSWaitCnt() const {
4866   return isImm();
4867 }
4868 
4869 //===----------------------------------------------------------------------===//
4870 // hwreg
4871 //===----------------------------------------------------------------------===//
4872 
4873 bool
4874 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4875                                 int64_t &Offset,
4876                                 int64_t &Width) {
4877   using namespace llvm::AMDGPU::Hwreg;
4878 
4879   // The register may be specified by name or using a numeric code
4880   if (isToken(AsmToken::Identifier) &&
4881       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4882     HwReg.IsSymbolic = true;
4883     lex(); // skip message name
4884   } else if (!parseExpr(HwReg.Id)) {
4885     return false;
4886   }
4887 
4888   if (trySkipToken(AsmToken::RParen))
4889     return true;
4890 
4891   // parse optional params
4892   return
4893     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4894     parseExpr(Offset) &&
4895     skipToken(AsmToken::Comma, "expected a comma") &&
4896     parseExpr(Width) &&
4897     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4898 }
4899 
4900 bool
4901 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4902                                const int64_t Offset,
4903                                const int64_t Width,
4904                                const SMLoc Loc) {
4905 
4906   using namespace llvm::AMDGPU::Hwreg;
4907 
4908   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4909     Error(Loc, "specified hardware register is not supported on this GPU");
4910     return false;
4911   } else if (!isValidHwreg(HwReg.Id)) {
4912     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4913     return false;
4914   } else if (!isValidHwregOffset(Offset)) {
4915     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4916     return false;
4917   } else if (!isValidHwregWidth(Width)) {
4918     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4919     return false;
4920   }
4921   return true;
4922 }
4923 
4924 OperandMatchResultTy
4925 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4926   using namespace llvm::AMDGPU::Hwreg;
4927 
4928   int64_t ImmVal = 0;
4929   SMLoc Loc = getLoc();
4930 
4931   // If parse failed, do not return error code
4932   // to avoid excessive error messages.
4933   if (trySkipId("hwreg", AsmToken::LParen)) {
4934     OperandInfoTy HwReg(ID_UNKNOWN_);
4935     int64_t Offset = OFFSET_DEFAULT_;
4936     int64_t Width = WIDTH_DEFAULT_;
4937     if (parseHwregBody(HwReg, Offset, Width) &&
4938         validateHwreg(HwReg, Offset, Width, Loc)) {
4939       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4940     }
4941   } else if (parseExpr(ImmVal)) {
4942     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4943       Error(Loc, "invalid immediate: only 16-bit values are legal");
4944   }
4945 
4946   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4947   return MatchOperand_Success;
4948 }
4949 
4950 bool AMDGPUOperand::isHwreg() const {
4951   return isImmTy(ImmTyHwreg);
4952 }
4953 
4954 //===----------------------------------------------------------------------===//
4955 // sendmsg
4956 //===----------------------------------------------------------------------===//
4957 
4958 bool
4959 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4960                                   OperandInfoTy &Op,
4961                                   OperandInfoTy &Stream) {
4962   using namespace llvm::AMDGPU::SendMsg;
4963 
4964   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4965     Msg.IsSymbolic = true;
4966     lex(); // skip message name
4967   } else if (!parseExpr(Msg.Id)) {
4968     return false;
4969   }
4970 
4971   if (trySkipToken(AsmToken::Comma)) {
4972     Op.IsDefined = true;
4973     if (isToken(AsmToken::Identifier) &&
4974         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4975       lex(); // skip operation name
4976     } else if (!parseExpr(Op.Id)) {
4977       return false;
4978     }
4979 
4980     if (trySkipToken(AsmToken::Comma)) {
4981       Stream.IsDefined = true;
4982       if (!parseExpr(Stream.Id))
4983         return false;
4984     }
4985   }
4986 
4987   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4988 }
4989 
4990 bool
4991 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4992                                  const OperandInfoTy &Op,
4993                                  const OperandInfoTy &Stream,
4994                                  const SMLoc S) {
4995   using namespace llvm::AMDGPU::SendMsg;
4996 
4997   // Validation strictness depends on whether message is specified
4998   // in a symbolc or in a numeric form. In the latter case
4999   // only encoding possibility is checked.
5000   bool Strict = Msg.IsSymbolic;
5001 
5002   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5003     Error(S, "invalid message id");
5004     return false;
5005   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5006     Error(S, Op.IsDefined ?
5007              "message does not support operations" :
5008              "missing message operation");
5009     return false;
5010   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5011     Error(S, "invalid operation id");
5012     return false;
5013   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5014     Error(S, "message operation does not support streams");
5015     return false;
5016   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5017     Error(S, "invalid message stream id");
5018     return false;
5019   }
5020   return true;
5021 }
5022 
5023 OperandMatchResultTy
5024 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5025   using namespace llvm::AMDGPU::SendMsg;
5026 
5027   int64_t ImmVal = 0;
5028   SMLoc Loc = getLoc();
5029 
5030   // If parse failed, do not return error code
5031   // to avoid excessive error messages.
5032   if (trySkipId("sendmsg", AsmToken::LParen)) {
5033     OperandInfoTy Msg(ID_UNKNOWN_);
5034     OperandInfoTy Op(OP_NONE_);
5035     OperandInfoTy Stream(STREAM_ID_NONE_);
5036     if (parseSendMsgBody(Msg, Op, Stream) &&
5037         validateSendMsg(Msg, Op, Stream, Loc)) {
5038       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5039     }
5040   } else if (parseExpr(ImmVal)) {
5041     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5042       Error(Loc, "invalid immediate: only 16-bit values are legal");
5043   }
5044 
5045   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5046   return MatchOperand_Success;
5047 }
5048 
5049 bool AMDGPUOperand::isSendMsg() const {
5050   return isImmTy(ImmTySendMsg);
5051 }
5052 
5053 //===----------------------------------------------------------------------===//
5054 // v_interp
5055 //===----------------------------------------------------------------------===//
5056 
5057 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5058   if (getLexer().getKind() != AsmToken::Identifier)
5059     return MatchOperand_NoMatch;
5060 
5061   StringRef Str = Parser.getTok().getString();
5062   int Slot = StringSwitch<int>(Str)
5063     .Case("p10", 0)
5064     .Case("p20", 1)
5065     .Case("p0", 2)
5066     .Default(-1);
5067 
5068   SMLoc S = Parser.getTok().getLoc();
5069   if (Slot == -1)
5070     return MatchOperand_ParseFail;
5071 
5072   Parser.Lex();
5073   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5074                                               AMDGPUOperand::ImmTyInterpSlot));
5075   return MatchOperand_Success;
5076 }
5077 
5078 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5079   if (getLexer().getKind() != AsmToken::Identifier)
5080     return MatchOperand_NoMatch;
5081 
5082   StringRef Str = Parser.getTok().getString();
5083   if (!Str.startswith("attr"))
5084     return MatchOperand_NoMatch;
5085 
5086   StringRef Chan = Str.take_back(2);
5087   int AttrChan = StringSwitch<int>(Chan)
5088     .Case(".x", 0)
5089     .Case(".y", 1)
5090     .Case(".z", 2)
5091     .Case(".w", 3)
5092     .Default(-1);
5093   if (AttrChan == -1)
5094     return MatchOperand_ParseFail;
5095 
5096   Str = Str.drop_back(2).drop_front(4);
5097 
5098   uint8_t Attr;
5099   if (Str.getAsInteger(10, Attr))
5100     return MatchOperand_ParseFail;
5101 
5102   SMLoc S = Parser.getTok().getLoc();
5103   Parser.Lex();
5104   if (Attr > 63) {
5105     Error(S, "out of bounds attr");
5106     return MatchOperand_Success;
5107   }
5108 
5109   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5110 
5111   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5112                                               AMDGPUOperand::ImmTyInterpAttr));
5113   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5114                                               AMDGPUOperand::ImmTyAttrChan));
5115   return MatchOperand_Success;
5116 }
5117 
5118 //===----------------------------------------------------------------------===//
5119 // exp
5120 //===----------------------------------------------------------------------===//
5121 
5122 void AMDGPUAsmParser::errorExpTgt() {
5123   Error(Parser.getTok().getLoc(), "invalid exp target");
5124 }
5125 
5126 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5127                                                       uint8_t &Val) {
5128   if (Str == "null") {
5129     Val = 9;
5130     return MatchOperand_Success;
5131   }
5132 
5133   if (Str.startswith("mrt")) {
5134     Str = Str.drop_front(3);
5135     if (Str == "z") { // == mrtz
5136       Val = 8;
5137       return MatchOperand_Success;
5138     }
5139 
5140     if (Str.getAsInteger(10, Val))
5141       return MatchOperand_ParseFail;
5142 
5143     if (Val > 7)
5144       errorExpTgt();
5145 
5146     return MatchOperand_Success;
5147   }
5148 
5149   if (Str.startswith("pos")) {
5150     Str = Str.drop_front(3);
5151     if (Str.getAsInteger(10, Val))
5152       return MatchOperand_ParseFail;
5153 
5154     if (Val > 4 || (Val == 4 && !isGFX10()))
5155       errorExpTgt();
5156 
5157     Val += 12;
5158     return MatchOperand_Success;
5159   }
5160 
5161   if (isGFX10() && Str == "prim") {
5162     Val = 20;
5163     return MatchOperand_Success;
5164   }
5165 
5166   if (Str.startswith("param")) {
5167     Str = Str.drop_front(5);
5168     if (Str.getAsInteger(10, Val))
5169       return MatchOperand_ParseFail;
5170 
5171     if (Val >= 32)
5172       errorExpTgt();
5173 
5174     Val += 32;
5175     return MatchOperand_Success;
5176   }
5177 
5178   if (Str.startswith("invalid_target_")) {
5179     Str = Str.drop_front(15);
5180     if (Str.getAsInteger(10, Val))
5181       return MatchOperand_ParseFail;
5182 
5183     errorExpTgt();
5184     return MatchOperand_Success;
5185   }
5186 
5187   return MatchOperand_NoMatch;
5188 }
5189 
5190 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5191   uint8_t Val;
5192   StringRef Str = Parser.getTok().getString();
5193 
5194   auto Res = parseExpTgtImpl(Str, Val);
5195   if (Res != MatchOperand_Success)
5196     return Res;
5197 
5198   SMLoc S = Parser.getTok().getLoc();
5199   Parser.Lex();
5200 
5201   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5202                                               AMDGPUOperand::ImmTyExpTgt));
5203   return MatchOperand_Success;
5204 }
5205 
5206 //===----------------------------------------------------------------------===//
5207 // parser helpers
5208 //===----------------------------------------------------------------------===//
5209 
5210 bool
5211 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5212   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5213 }
5214 
5215 bool
5216 AMDGPUAsmParser::isId(const StringRef Id) const {
5217   return isId(getToken(), Id);
5218 }
5219 
5220 bool
5221 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5222   return getTokenKind() == Kind;
5223 }
5224 
5225 bool
5226 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5227   if (isId(Id)) {
5228     lex();
5229     return true;
5230   }
5231   return false;
5232 }
5233 
5234 bool
5235 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5236   if (isId(Id) && peekToken().is(Kind)) {
5237     lex();
5238     lex();
5239     return true;
5240   }
5241   return false;
5242 }
5243 
5244 bool
5245 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5246   if (isToken(Kind)) {
5247     lex();
5248     return true;
5249   }
5250   return false;
5251 }
5252 
5253 bool
5254 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5255                            const StringRef ErrMsg) {
5256   if (!trySkipToken(Kind)) {
5257     Error(getLoc(), ErrMsg);
5258     return false;
5259   }
5260   return true;
5261 }
5262 
5263 bool
5264 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5265   return !getParser().parseAbsoluteExpression(Imm);
5266 }
5267 
5268 bool
5269 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5270   SMLoc S = getLoc();
5271 
5272   const MCExpr *Expr;
5273   if (Parser.parseExpression(Expr))
5274     return false;
5275 
5276   int64_t IntVal;
5277   if (Expr->evaluateAsAbsolute(IntVal)) {
5278     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5279   } else {
5280     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5281   }
5282   return true;
5283 }
5284 
5285 bool
5286 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5287   if (isToken(AsmToken::String)) {
5288     Val = getToken().getStringContents();
5289     lex();
5290     return true;
5291   } else {
5292     Error(getLoc(), ErrMsg);
5293     return false;
5294   }
5295 }
5296 
5297 AsmToken
5298 AMDGPUAsmParser::getToken() const {
5299   return Parser.getTok();
5300 }
5301 
5302 AsmToken
5303 AMDGPUAsmParser::peekToken() {
5304   return getLexer().peekTok();
5305 }
5306 
5307 void
5308 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5309   auto TokCount = getLexer().peekTokens(Tokens);
5310 
5311   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5312     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5313 }
5314 
5315 AsmToken::TokenKind
5316 AMDGPUAsmParser::getTokenKind() const {
5317   return getLexer().getKind();
5318 }
5319 
5320 SMLoc
5321 AMDGPUAsmParser::getLoc() const {
5322   return getToken().getLoc();
5323 }
5324 
5325 StringRef
5326 AMDGPUAsmParser::getTokenStr() const {
5327   return getToken().getString();
5328 }
5329 
5330 void
5331 AMDGPUAsmParser::lex() {
5332   Parser.Lex();
5333 }
5334 
5335 //===----------------------------------------------------------------------===//
5336 // swizzle
5337 //===----------------------------------------------------------------------===//
5338 
5339 LLVM_READNONE
5340 static unsigned
5341 encodeBitmaskPerm(const unsigned AndMask,
5342                   const unsigned OrMask,
5343                   const unsigned XorMask) {
5344   using namespace llvm::AMDGPU::Swizzle;
5345 
5346   return BITMASK_PERM_ENC |
5347          (AndMask << BITMASK_AND_SHIFT) |
5348          (OrMask  << BITMASK_OR_SHIFT)  |
5349          (XorMask << BITMASK_XOR_SHIFT);
5350 }
5351 
5352 bool
5353 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5354                                       const unsigned MinVal,
5355                                       const unsigned MaxVal,
5356                                       const StringRef ErrMsg) {
5357   for (unsigned i = 0; i < OpNum; ++i) {
5358     if (!skipToken(AsmToken::Comma, "expected a comma")){
5359       return false;
5360     }
5361     SMLoc ExprLoc = Parser.getTok().getLoc();
5362     if (!parseExpr(Op[i])) {
5363       return false;
5364     }
5365     if (Op[i] < MinVal || Op[i] > MaxVal) {
5366       Error(ExprLoc, ErrMsg);
5367       return false;
5368     }
5369   }
5370 
5371   return true;
5372 }
5373 
5374 bool
5375 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5376   using namespace llvm::AMDGPU::Swizzle;
5377 
5378   int64_t Lane[LANE_NUM];
5379   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5380                            "expected a 2-bit lane id")) {
5381     Imm = QUAD_PERM_ENC;
5382     for (unsigned I = 0; I < LANE_NUM; ++I) {
5383       Imm |= Lane[I] << (LANE_SHIFT * I);
5384     }
5385     return true;
5386   }
5387   return false;
5388 }
5389 
5390 bool
5391 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5392   using namespace llvm::AMDGPU::Swizzle;
5393 
5394   SMLoc S = Parser.getTok().getLoc();
5395   int64_t GroupSize;
5396   int64_t LaneIdx;
5397 
5398   if (!parseSwizzleOperands(1, &GroupSize,
5399                             2, 32,
5400                             "group size must be in the interval [2,32]")) {
5401     return false;
5402   }
5403   if (!isPowerOf2_64(GroupSize)) {
5404     Error(S, "group size must be a power of two");
5405     return false;
5406   }
5407   if (parseSwizzleOperands(1, &LaneIdx,
5408                            0, GroupSize - 1,
5409                            "lane id must be in the interval [0,group size - 1]")) {
5410     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5411     return true;
5412   }
5413   return false;
5414 }
5415 
5416 bool
5417 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5418   using namespace llvm::AMDGPU::Swizzle;
5419 
5420   SMLoc S = Parser.getTok().getLoc();
5421   int64_t GroupSize;
5422 
5423   if (!parseSwizzleOperands(1, &GroupSize,
5424       2, 32, "group size must be in the interval [2,32]")) {
5425     return false;
5426   }
5427   if (!isPowerOf2_64(GroupSize)) {
5428     Error(S, "group size must be a power of two");
5429     return false;
5430   }
5431 
5432   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5433   return true;
5434 }
5435 
5436 bool
5437 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5438   using namespace llvm::AMDGPU::Swizzle;
5439 
5440   SMLoc S = Parser.getTok().getLoc();
5441   int64_t GroupSize;
5442 
5443   if (!parseSwizzleOperands(1, &GroupSize,
5444       1, 16, "group size must be in the interval [1,16]")) {
5445     return false;
5446   }
5447   if (!isPowerOf2_64(GroupSize)) {
5448     Error(S, "group size must be a power of two");
5449     return false;
5450   }
5451 
5452   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5453   return true;
5454 }
5455 
5456 bool
5457 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5458   using namespace llvm::AMDGPU::Swizzle;
5459 
5460   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5461     return false;
5462   }
5463 
5464   StringRef Ctl;
5465   SMLoc StrLoc = Parser.getTok().getLoc();
5466   if (!parseString(Ctl)) {
5467     return false;
5468   }
5469   if (Ctl.size() != BITMASK_WIDTH) {
5470     Error(StrLoc, "expected a 5-character mask");
5471     return false;
5472   }
5473 
5474   unsigned AndMask = 0;
5475   unsigned OrMask = 0;
5476   unsigned XorMask = 0;
5477 
5478   for (size_t i = 0; i < Ctl.size(); ++i) {
5479     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5480     switch(Ctl[i]) {
5481     default:
5482       Error(StrLoc, "invalid mask");
5483       return false;
5484     case '0':
5485       break;
5486     case '1':
5487       OrMask |= Mask;
5488       break;
5489     case 'p':
5490       AndMask |= Mask;
5491       break;
5492     case 'i':
5493       AndMask |= Mask;
5494       XorMask |= Mask;
5495       break;
5496     }
5497   }
5498 
5499   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5500   return true;
5501 }
5502 
5503 bool
5504 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5505 
5506   SMLoc OffsetLoc = Parser.getTok().getLoc();
5507 
5508   if (!parseExpr(Imm)) {
5509     return false;
5510   }
5511   if (!isUInt<16>(Imm)) {
5512     Error(OffsetLoc, "expected a 16-bit offset");
5513     return false;
5514   }
5515   return true;
5516 }
5517 
5518 bool
5519 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5520   using namespace llvm::AMDGPU::Swizzle;
5521 
5522   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5523 
5524     SMLoc ModeLoc = Parser.getTok().getLoc();
5525     bool Ok = false;
5526 
5527     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5528       Ok = parseSwizzleQuadPerm(Imm);
5529     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5530       Ok = parseSwizzleBitmaskPerm(Imm);
5531     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5532       Ok = parseSwizzleBroadcast(Imm);
5533     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5534       Ok = parseSwizzleSwap(Imm);
5535     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5536       Ok = parseSwizzleReverse(Imm);
5537     } else {
5538       Error(ModeLoc, "expected a swizzle mode");
5539     }
5540 
5541     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5542   }
5543 
5544   return false;
5545 }
5546 
5547 OperandMatchResultTy
5548 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5549   SMLoc S = Parser.getTok().getLoc();
5550   int64_t Imm = 0;
5551 
5552   if (trySkipId("offset")) {
5553 
5554     bool Ok = false;
5555     if (skipToken(AsmToken::Colon, "expected a colon")) {
5556       if (trySkipId("swizzle")) {
5557         Ok = parseSwizzleMacro(Imm);
5558       } else {
5559         Ok = parseSwizzleOffset(Imm);
5560       }
5561     }
5562 
5563     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5564 
5565     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5566   } else {
5567     // Swizzle "offset" operand is optional.
5568     // If it is omitted, try parsing other optional operands.
5569     return parseOptionalOpr(Operands);
5570   }
5571 }
5572 
5573 bool
5574 AMDGPUOperand::isSwizzle() const {
5575   return isImmTy(ImmTySwizzle);
5576 }
5577 
5578 //===----------------------------------------------------------------------===//
5579 // VGPR Index Mode
5580 //===----------------------------------------------------------------------===//
5581 
5582 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5583 
5584   using namespace llvm::AMDGPU::VGPRIndexMode;
5585 
5586   if (trySkipToken(AsmToken::RParen)) {
5587     return OFF;
5588   }
5589 
5590   int64_t Imm = 0;
5591 
5592   while (true) {
5593     unsigned Mode = 0;
5594     SMLoc S = Parser.getTok().getLoc();
5595 
5596     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5597       if (trySkipId(IdSymbolic[ModeId])) {
5598         Mode = 1 << ModeId;
5599         break;
5600       }
5601     }
5602 
5603     if (Mode == 0) {
5604       Error(S, (Imm == 0)?
5605                "expected a VGPR index mode or a closing parenthesis" :
5606                "expected a VGPR index mode");
5607       break;
5608     }
5609 
5610     if (Imm & Mode) {
5611       Error(S, "duplicate VGPR index mode");
5612       break;
5613     }
5614     Imm |= Mode;
5615 
5616     if (trySkipToken(AsmToken::RParen))
5617       break;
5618     if (!skipToken(AsmToken::Comma,
5619                    "expected a comma or a closing parenthesis"))
5620       break;
5621   }
5622 
5623   return Imm;
5624 }
5625 
5626 OperandMatchResultTy
5627 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5628 
5629   int64_t Imm = 0;
5630   SMLoc S = Parser.getTok().getLoc();
5631 
5632   if (getLexer().getKind() == AsmToken::Identifier &&
5633       Parser.getTok().getString() == "gpr_idx" &&
5634       getLexer().peekTok().is(AsmToken::LParen)) {
5635 
5636     Parser.Lex();
5637     Parser.Lex();
5638 
5639     // If parse failed, trigger an error but do not return error code
5640     // to avoid excessive error messages.
5641     Imm = parseGPRIdxMacro();
5642 
5643   } else {
5644     if (getParser().parseAbsoluteExpression(Imm))
5645       return MatchOperand_NoMatch;
5646     if (Imm < 0 || !isUInt<4>(Imm)) {
5647       Error(S, "invalid immediate: only 4-bit values are legal");
5648     }
5649   }
5650 
5651   Operands.push_back(
5652       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5653   return MatchOperand_Success;
5654 }
5655 
5656 bool AMDGPUOperand::isGPRIdxMode() const {
5657   return isImmTy(ImmTyGprIdxMode);
5658 }
5659 
5660 //===----------------------------------------------------------------------===//
5661 // sopp branch targets
5662 //===----------------------------------------------------------------------===//
5663 
5664 OperandMatchResultTy
5665 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5666 
5667   // Make sure we are not parsing something
5668   // that looks like a label or an expression but is not.
5669   // This will improve error messages.
5670   if (isRegister() || isModifier())
5671     return MatchOperand_NoMatch;
5672 
5673   if (parseExpr(Operands)) {
5674 
5675     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5676     assert(Opr.isImm() || Opr.isExpr());
5677     SMLoc Loc = Opr.getStartLoc();
5678 
5679     // Currently we do not support arbitrary expressions as branch targets.
5680     // Only labels and absolute expressions are accepted.
5681     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5682       Error(Loc, "expected an absolute expression or a label");
5683     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5684       Error(Loc, "expected a 16-bit signed jump offset");
5685     }
5686   }
5687 
5688   return MatchOperand_Success; // avoid excessive error messages
5689 }
5690 
5691 //===----------------------------------------------------------------------===//
5692 // Boolean holding registers
5693 //===----------------------------------------------------------------------===//
5694 
5695 OperandMatchResultTy
5696 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5697   return parseReg(Operands);
5698 }
5699 
5700 //===----------------------------------------------------------------------===//
5701 // mubuf
5702 //===----------------------------------------------------------------------===//
5703 
5704 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5705   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5706 }
5707 
5708 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5709   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5710 }
5711 
5712 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5713   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5714 }
5715 
5716 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5717                                const OperandVector &Operands,
5718                                bool IsAtomic,
5719                                bool IsAtomicReturn,
5720                                bool IsLds) {
5721   bool IsLdsOpcode = IsLds;
5722   bool HasLdsModifier = false;
5723   OptionalImmIndexMap OptionalIdx;
5724   assert(IsAtomicReturn ? IsAtomic : true);
5725   unsigned FirstOperandIdx = 1;
5726 
5727   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5728     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5729 
5730     // Add the register arguments
5731     if (Op.isReg()) {
5732       Op.addRegOperands(Inst, 1);
5733       // Insert a tied src for atomic return dst.
5734       // This cannot be postponed as subsequent calls to
5735       // addImmOperands rely on correct number of MC operands.
5736       if (IsAtomicReturn && i == FirstOperandIdx)
5737         Op.addRegOperands(Inst, 1);
5738       continue;
5739     }
5740 
5741     // Handle the case where soffset is an immediate
5742     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5743       Op.addImmOperands(Inst, 1);
5744       continue;
5745     }
5746 
5747     HasLdsModifier |= Op.isLDS();
5748 
5749     // Handle tokens like 'offen' which are sometimes hard-coded into the
5750     // asm string.  There are no MCInst operands for these.
5751     if (Op.isToken()) {
5752       continue;
5753     }
5754     assert(Op.isImm());
5755 
5756     // Handle optional arguments
5757     OptionalIdx[Op.getImmTy()] = i;
5758   }
5759 
5760   // This is a workaround for an llvm quirk which may result in an
5761   // incorrect instruction selection. Lds and non-lds versions of
5762   // MUBUF instructions are identical except that lds versions
5763   // have mandatory 'lds' modifier. However this modifier follows
5764   // optional modifiers and llvm asm matcher regards this 'lds'
5765   // modifier as an optional one. As a result, an lds version
5766   // of opcode may be selected even if it has no 'lds' modifier.
5767   if (IsLdsOpcode && !HasLdsModifier) {
5768     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5769     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5770       Inst.setOpcode(NoLdsOpcode);
5771       IsLdsOpcode = false;
5772     }
5773   }
5774 
5775   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5776   if (!IsAtomic) { // glc is hard-coded.
5777     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5778   }
5779   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5780 
5781   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5782     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5783   }
5784 
5785   if (isGFX10())
5786     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5787 }
5788 
5789 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5790   OptionalImmIndexMap OptionalIdx;
5791 
5792   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5793     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5794 
5795     // Add the register arguments
5796     if (Op.isReg()) {
5797       Op.addRegOperands(Inst, 1);
5798       continue;
5799     }
5800 
5801     // Handle the case where soffset is an immediate
5802     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5803       Op.addImmOperands(Inst, 1);
5804       continue;
5805     }
5806 
5807     // Handle tokens like 'offen' which are sometimes hard-coded into the
5808     // asm string.  There are no MCInst operands for these.
5809     if (Op.isToken()) {
5810       continue;
5811     }
5812     assert(Op.isImm());
5813 
5814     // Handle optional arguments
5815     OptionalIdx[Op.getImmTy()] = i;
5816   }
5817 
5818   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5819                         AMDGPUOperand::ImmTyOffset);
5820   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5821   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5822   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5823   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5824 
5825   if (isGFX10())
5826     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5827 }
5828 
5829 //===----------------------------------------------------------------------===//
5830 // mimg
5831 //===----------------------------------------------------------------------===//
5832 
5833 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5834                               bool IsAtomic) {
5835   unsigned I = 1;
5836   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5837   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5838     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5839   }
5840 
5841   if (IsAtomic) {
5842     // Add src, same as dst
5843     assert(Desc.getNumDefs() == 1);
5844     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5845   }
5846 
5847   OptionalImmIndexMap OptionalIdx;
5848 
5849   for (unsigned E = Operands.size(); I != E; ++I) {
5850     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5851 
5852     // Add the register arguments
5853     if (Op.isReg()) {
5854       Op.addRegOperands(Inst, 1);
5855     } else if (Op.isImmModifier()) {
5856       OptionalIdx[Op.getImmTy()] = I;
5857     } else if (!Op.isToken()) {
5858       llvm_unreachable("unexpected operand type");
5859     }
5860   }
5861 
5862   bool IsGFX10 = isGFX10();
5863 
5864   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5865   if (IsGFX10)
5866     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5867   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5868   if (IsGFX10)
5869     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5870   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5871   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5872   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5873   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5874   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5875   if (!IsGFX10)
5876     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5877   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5878 }
5879 
5880 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5881   cvtMIMG(Inst, Operands, true);
5882 }
5883 
5884 //===----------------------------------------------------------------------===//
5885 // smrd
5886 //===----------------------------------------------------------------------===//
5887 
5888 bool AMDGPUOperand::isSMRDOffset8() const {
5889   return isImm() && isUInt<8>(getImm());
5890 }
5891 
5892 bool AMDGPUOperand::isSMRDOffset20() const {
5893   return isImm() && isUInt<20>(getImm());
5894 }
5895 
5896 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5897   // 32-bit literals are only supported on CI and we only want to use them
5898   // when the offset is > 8-bits.
5899   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5900 }
5901 
5902 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5903   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5904 }
5905 
5906 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5907   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5908 }
5909 
5910 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5911   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5912 }
5913 
5914 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5915   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5916 }
5917 
5918 //===----------------------------------------------------------------------===//
5919 // vop3
5920 //===----------------------------------------------------------------------===//
5921 
5922 static bool ConvertOmodMul(int64_t &Mul) {
5923   if (Mul != 1 && Mul != 2 && Mul != 4)
5924     return false;
5925 
5926   Mul >>= 1;
5927   return true;
5928 }
5929 
5930 static bool ConvertOmodDiv(int64_t &Div) {
5931   if (Div == 1) {
5932     Div = 0;
5933     return true;
5934   }
5935 
5936   if (Div == 2) {
5937     Div = 3;
5938     return true;
5939   }
5940 
5941   return false;
5942 }
5943 
5944 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5945   if (BoundCtrl == 0) {
5946     BoundCtrl = 1;
5947     return true;
5948   }
5949 
5950   if (BoundCtrl == -1) {
5951     BoundCtrl = 0;
5952     return true;
5953   }
5954 
5955   return false;
5956 }
5957 
5958 // Note: the order in this table matches the order of operands in AsmString.
5959 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5960   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5961   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5962   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5963   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5964   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5965   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5966   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5967   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5968   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5969   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5970   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5971   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5972   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5973   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5974   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5975   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5976   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5977   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5978   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5979   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5980   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5981   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5982   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5983   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5984   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5985   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5986   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5987   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5988   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5989   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5990   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5991   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5992   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5993   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5994   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5995   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5996   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5997   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5998   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5999   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6000   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6001   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6002   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6003 };
6004 
6005 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6006   unsigned size = Operands.size();
6007   assert(size > 0);
6008 
6009   OperandMatchResultTy res = parseOptionalOpr(Operands);
6010 
6011   // This is a hack to enable hardcoded mandatory operands which follow
6012   // optional operands.
6013   //
6014   // Current design assumes that all operands after the first optional operand
6015   // are also optional. However implementation of some instructions violates
6016   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6017   //
6018   // To alleviate this problem, we have to (implicitly) parse extra operands
6019   // to make sure autogenerated parser of custom operands never hit hardcoded
6020   // mandatory operands.
6021 
6022   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6023 
6024     // We have parsed the first optional operand.
6025     // Parse as many operands as necessary to skip all mandatory operands.
6026 
6027     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6028       if (res != MatchOperand_Success ||
6029           getLexer().is(AsmToken::EndOfStatement)) break;
6030       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6031       res = parseOptionalOpr(Operands);
6032     }
6033   }
6034 
6035   return res;
6036 }
6037 
6038 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6039   OperandMatchResultTy res;
6040   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6041     // try to parse any optional operand here
6042     if (Op.IsBit) {
6043       res = parseNamedBit(Op.Name, Operands, Op.Type);
6044     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6045       res = parseOModOperand(Operands);
6046     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6047                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6048                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6049       res = parseSDWASel(Operands, Op.Name, Op.Type);
6050     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6051       res = parseSDWADstUnused(Operands);
6052     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6053                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6054                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6055                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6056       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6057                                         Op.ConvertResult);
6058     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6059       res = parseDim(Operands);
6060     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6061       res = parseDfmtNfmt(Operands);
6062     } else {
6063       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6064     }
6065     if (res != MatchOperand_NoMatch) {
6066       return res;
6067     }
6068   }
6069   return MatchOperand_NoMatch;
6070 }
6071 
6072 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6073   StringRef Name = Parser.getTok().getString();
6074   if (Name == "mul") {
6075     return parseIntWithPrefix("mul", Operands,
6076                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6077   }
6078 
6079   if (Name == "div") {
6080     return parseIntWithPrefix("div", Operands,
6081                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6082   }
6083 
6084   return MatchOperand_NoMatch;
6085 }
6086 
6087 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6088   cvtVOP3P(Inst, Operands);
6089 
6090   int Opc = Inst.getOpcode();
6091 
6092   int SrcNum;
6093   const int Ops[] = { AMDGPU::OpName::src0,
6094                       AMDGPU::OpName::src1,
6095                       AMDGPU::OpName::src2 };
6096   for (SrcNum = 0;
6097        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6098        ++SrcNum);
6099   assert(SrcNum > 0);
6100 
6101   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6102   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6103 
6104   if ((OpSel & (1 << SrcNum)) != 0) {
6105     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6106     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6107     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6108   }
6109 }
6110 
6111 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6112       // 1. This operand is input modifiers
6113   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6114       // 2. This is not last operand
6115       && Desc.NumOperands > (OpNum + 1)
6116       // 3. Next operand is register class
6117       && Desc.OpInfo[OpNum + 1].RegClass != -1
6118       // 4. Next register is not tied to any other operand
6119       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6120 }
6121 
6122 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6123 {
6124   OptionalImmIndexMap OptionalIdx;
6125   unsigned Opc = Inst.getOpcode();
6126 
6127   unsigned I = 1;
6128   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6129   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6130     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6131   }
6132 
6133   for (unsigned E = Operands.size(); I != E; ++I) {
6134     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6135     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6136       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6137     } else if (Op.isInterpSlot() ||
6138                Op.isInterpAttr() ||
6139                Op.isAttrChan()) {
6140       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6141     } else if (Op.isImmModifier()) {
6142       OptionalIdx[Op.getImmTy()] = I;
6143     } else {
6144       llvm_unreachable("unhandled operand type");
6145     }
6146   }
6147 
6148   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6149     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6150   }
6151 
6152   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6153     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6154   }
6155 
6156   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6157     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6158   }
6159 }
6160 
6161 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6162                               OptionalImmIndexMap &OptionalIdx) {
6163   unsigned Opc = Inst.getOpcode();
6164 
6165   unsigned I = 1;
6166   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6167   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6168     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6169   }
6170 
6171   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6172     // This instruction has src modifiers
6173     for (unsigned E = Operands.size(); I != E; ++I) {
6174       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6175       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6176         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6177       } else if (Op.isImmModifier()) {
6178         OptionalIdx[Op.getImmTy()] = I;
6179       } else if (Op.isRegOrImm()) {
6180         Op.addRegOrImmOperands(Inst, 1);
6181       } else {
6182         llvm_unreachable("unhandled operand type");
6183       }
6184     }
6185   } else {
6186     // No src modifiers
6187     for (unsigned E = Operands.size(); I != E; ++I) {
6188       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6189       if (Op.isMod()) {
6190         OptionalIdx[Op.getImmTy()] = I;
6191       } else {
6192         Op.addRegOrImmOperands(Inst, 1);
6193       }
6194     }
6195   }
6196 
6197   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6198     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6199   }
6200 
6201   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6202     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6203   }
6204 
6205   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6206   // it has src2 register operand that is tied to dst operand
6207   // we don't allow modifiers for this operand in assembler so src2_modifiers
6208   // should be 0.
6209   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6210       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6211       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6212       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6213       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6214       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6215       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6216     auto it = Inst.begin();
6217     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6218     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6219     ++it;
6220     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6221   }
6222 }
6223 
6224 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6225   OptionalImmIndexMap OptionalIdx;
6226   cvtVOP3(Inst, Operands, OptionalIdx);
6227 }
6228 
6229 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6230                                const OperandVector &Operands) {
6231   OptionalImmIndexMap OptIdx;
6232   const int Opc = Inst.getOpcode();
6233   const MCInstrDesc &Desc = MII.get(Opc);
6234 
6235   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6236 
6237   cvtVOP3(Inst, Operands, OptIdx);
6238 
6239   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6240     assert(!IsPacked);
6241     Inst.addOperand(Inst.getOperand(0));
6242   }
6243 
6244   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6245   // instruction, and then figure out where to actually put the modifiers
6246 
6247   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6248 
6249   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6250   if (OpSelHiIdx != -1) {
6251     int DefaultVal = IsPacked ? -1 : 0;
6252     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6253                           DefaultVal);
6254   }
6255 
6256   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6257   if (NegLoIdx != -1) {
6258     assert(IsPacked);
6259     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6260     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6261   }
6262 
6263   const int Ops[] = { AMDGPU::OpName::src0,
6264                       AMDGPU::OpName::src1,
6265                       AMDGPU::OpName::src2 };
6266   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6267                          AMDGPU::OpName::src1_modifiers,
6268                          AMDGPU::OpName::src2_modifiers };
6269 
6270   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6271 
6272   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6273   unsigned OpSelHi = 0;
6274   unsigned NegLo = 0;
6275   unsigned NegHi = 0;
6276 
6277   if (OpSelHiIdx != -1) {
6278     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6279   }
6280 
6281   if (NegLoIdx != -1) {
6282     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6283     NegLo = Inst.getOperand(NegLoIdx).getImm();
6284     NegHi = Inst.getOperand(NegHiIdx).getImm();
6285   }
6286 
6287   for (int J = 0; J < 3; ++J) {
6288     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6289     if (OpIdx == -1)
6290       break;
6291 
6292     uint32_t ModVal = 0;
6293 
6294     if ((OpSel & (1 << J)) != 0)
6295       ModVal |= SISrcMods::OP_SEL_0;
6296 
6297     if ((OpSelHi & (1 << J)) != 0)
6298       ModVal |= SISrcMods::OP_SEL_1;
6299 
6300     if ((NegLo & (1 << J)) != 0)
6301       ModVal |= SISrcMods::NEG;
6302 
6303     if ((NegHi & (1 << J)) != 0)
6304       ModVal |= SISrcMods::NEG_HI;
6305 
6306     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6307 
6308     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6309   }
6310 }
6311 
6312 //===----------------------------------------------------------------------===//
6313 // dpp
6314 //===----------------------------------------------------------------------===//
6315 
6316 bool AMDGPUOperand::isDPP8() const {
6317   return isImmTy(ImmTyDPP8);
6318 }
6319 
6320 bool AMDGPUOperand::isDPPCtrl() const {
6321   using namespace AMDGPU::DPP;
6322 
6323   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6324   if (result) {
6325     int64_t Imm = getImm();
6326     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6327            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6328            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6329            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6330            (Imm == DppCtrl::WAVE_SHL1) ||
6331            (Imm == DppCtrl::WAVE_ROL1) ||
6332            (Imm == DppCtrl::WAVE_SHR1) ||
6333            (Imm == DppCtrl::WAVE_ROR1) ||
6334            (Imm == DppCtrl::ROW_MIRROR) ||
6335            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6336            (Imm == DppCtrl::BCAST15) ||
6337            (Imm == DppCtrl::BCAST31) ||
6338            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6339            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6340   }
6341   return false;
6342 }
6343 
6344 //===----------------------------------------------------------------------===//
6345 // mAI
6346 //===----------------------------------------------------------------------===//
6347 
6348 bool AMDGPUOperand::isBLGP() const {
6349   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6350 }
6351 
6352 bool AMDGPUOperand::isCBSZ() const {
6353   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6354 }
6355 
6356 bool AMDGPUOperand::isABID() const {
6357   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6358 }
6359 
6360 bool AMDGPUOperand::isS16Imm() const {
6361   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6362 }
6363 
6364 bool AMDGPUOperand::isU16Imm() const {
6365   return isImm() && isUInt<16>(getImm());
6366 }
6367 
6368 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6369   if (!isGFX10())
6370     return MatchOperand_NoMatch;
6371 
6372   SMLoc S = Parser.getTok().getLoc();
6373 
6374   if (getLexer().isNot(AsmToken::Identifier))
6375     return MatchOperand_NoMatch;
6376   if (getLexer().getTok().getString() != "dim")
6377     return MatchOperand_NoMatch;
6378 
6379   Parser.Lex();
6380   if (getLexer().isNot(AsmToken::Colon))
6381     return MatchOperand_ParseFail;
6382 
6383   Parser.Lex();
6384 
6385   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6386   // integer.
6387   std::string Token;
6388   if (getLexer().is(AsmToken::Integer)) {
6389     SMLoc Loc = getLexer().getTok().getEndLoc();
6390     Token = getLexer().getTok().getString();
6391     Parser.Lex();
6392     if (getLexer().getTok().getLoc() != Loc)
6393       return MatchOperand_ParseFail;
6394   }
6395   if (getLexer().isNot(AsmToken::Identifier))
6396     return MatchOperand_ParseFail;
6397   Token += getLexer().getTok().getString();
6398 
6399   StringRef DimId = Token;
6400   if (DimId.startswith("SQ_RSRC_IMG_"))
6401     DimId = DimId.substr(12);
6402 
6403   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6404   if (!DimInfo)
6405     return MatchOperand_ParseFail;
6406 
6407   Parser.Lex();
6408 
6409   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6410                                               AMDGPUOperand::ImmTyDim));
6411   return MatchOperand_Success;
6412 }
6413 
6414 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6415   SMLoc S = Parser.getTok().getLoc();
6416   StringRef Prefix;
6417 
6418   if (getLexer().getKind() == AsmToken::Identifier) {
6419     Prefix = Parser.getTok().getString();
6420   } else {
6421     return MatchOperand_NoMatch;
6422   }
6423 
6424   if (Prefix != "dpp8")
6425     return parseDPPCtrl(Operands);
6426   if (!isGFX10())
6427     return MatchOperand_NoMatch;
6428 
6429   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6430 
6431   int64_t Sels[8];
6432 
6433   Parser.Lex();
6434   if (getLexer().isNot(AsmToken::Colon))
6435     return MatchOperand_ParseFail;
6436 
6437   Parser.Lex();
6438   if (getLexer().isNot(AsmToken::LBrac))
6439     return MatchOperand_ParseFail;
6440 
6441   Parser.Lex();
6442   if (getParser().parseAbsoluteExpression(Sels[0]))
6443     return MatchOperand_ParseFail;
6444   if (0 > Sels[0] || 7 < Sels[0])
6445     return MatchOperand_ParseFail;
6446 
6447   for (size_t i = 1; i < 8; ++i) {
6448     if (getLexer().isNot(AsmToken::Comma))
6449       return MatchOperand_ParseFail;
6450 
6451     Parser.Lex();
6452     if (getParser().parseAbsoluteExpression(Sels[i]))
6453       return MatchOperand_ParseFail;
6454     if (0 > Sels[i] || 7 < Sels[i])
6455       return MatchOperand_ParseFail;
6456   }
6457 
6458   if (getLexer().isNot(AsmToken::RBrac))
6459     return MatchOperand_ParseFail;
6460   Parser.Lex();
6461 
6462   unsigned DPP8 = 0;
6463   for (size_t i = 0; i < 8; ++i)
6464     DPP8 |= (Sels[i] << (i * 3));
6465 
6466   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6467   return MatchOperand_Success;
6468 }
6469 
6470 OperandMatchResultTy
6471 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6472   using namespace AMDGPU::DPP;
6473 
6474   SMLoc S = Parser.getTok().getLoc();
6475   StringRef Prefix;
6476   int64_t Int;
6477 
6478   if (getLexer().getKind() == AsmToken::Identifier) {
6479     Prefix = Parser.getTok().getString();
6480   } else {
6481     return MatchOperand_NoMatch;
6482   }
6483 
6484   if (Prefix == "row_mirror") {
6485     Int = DppCtrl::ROW_MIRROR;
6486     Parser.Lex();
6487   } else if (Prefix == "row_half_mirror") {
6488     Int = DppCtrl::ROW_HALF_MIRROR;
6489     Parser.Lex();
6490   } else {
6491     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6492     if (Prefix != "quad_perm"
6493         && Prefix != "row_shl"
6494         && Prefix != "row_shr"
6495         && Prefix != "row_ror"
6496         && Prefix != "wave_shl"
6497         && Prefix != "wave_rol"
6498         && Prefix != "wave_shr"
6499         && Prefix != "wave_ror"
6500         && Prefix != "row_bcast"
6501         && Prefix != "row_share"
6502         && Prefix != "row_xmask") {
6503       return MatchOperand_NoMatch;
6504     }
6505 
6506     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6507       return MatchOperand_NoMatch;
6508 
6509     if (!isVI() && !isGFX9() &&
6510         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6511          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6512          Prefix == "row_bcast"))
6513       return MatchOperand_NoMatch;
6514 
6515     Parser.Lex();
6516     if (getLexer().isNot(AsmToken::Colon))
6517       return MatchOperand_ParseFail;
6518 
6519     if (Prefix == "quad_perm") {
6520       // quad_perm:[%d,%d,%d,%d]
6521       Parser.Lex();
6522       if (getLexer().isNot(AsmToken::LBrac))
6523         return MatchOperand_ParseFail;
6524       Parser.Lex();
6525 
6526       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6527         return MatchOperand_ParseFail;
6528 
6529       for (int i = 0; i < 3; ++i) {
6530         if (getLexer().isNot(AsmToken::Comma))
6531           return MatchOperand_ParseFail;
6532         Parser.Lex();
6533 
6534         int64_t Temp;
6535         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6536           return MatchOperand_ParseFail;
6537         const int shift = i*2 + 2;
6538         Int += (Temp << shift);
6539       }
6540 
6541       if (getLexer().isNot(AsmToken::RBrac))
6542         return MatchOperand_ParseFail;
6543       Parser.Lex();
6544     } else {
6545       // sel:%d
6546       Parser.Lex();
6547       if (getParser().parseAbsoluteExpression(Int))
6548         return MatchOperand_ParseFail;
6549 
6550       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6551         Int |= DppCtrl::ROW_SHL0;
6552       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6553         Int |= DppCtrl::ROW_SHR0;
6554       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6555         Int |= DppCtrl::ROW_ROR0;
6556       } else if (Prefix == "wave_shl" && 1 == Int) {
6557         Int = DppCtrl::WAVE_SHL1;
6558       } else if (Prefix == "wave_rol" && 1 == Int) {
6559         Int = DppCtrl::WAVE_ROL1;
6560       } else if (Prefix == "wave_shr" && 1 == Int) {
6561         Int = DppCtrl::WAVE_SHR1;
6562       } else if (Prefix == "wave_ror" && 1 == Int) {
6563         Int = DppCtrl::WAVE_ROR1;
6564       } else if (Prefix == "row_bcast") {
6565         if (Int == 15) {
6566           Int = DppCtrl::BCAST15;
6567         } else if (Int == 31) {
6568           Int = DppCtrl::BCAST31;
6569         } else {
6570           return MatchOperand_ParseFail;
6571         }
6572       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6573         Int |= DppCtrl::ROW_SHARE_FIRST;
6574       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6575         Int |= DppCtrl::ROW_XMASK_FIRST;
6576       } else {
6577         return MatchOperand_ParseFail;
6578       }
6579     }
6580   }
6581 
6582   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6583   return MatchOperand_Success;
6584 }
6585 
6586 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6587   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6588 }
6589 
6590 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6591   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6592 }
6593 
6594 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6595   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6596 }
6597 
6598 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6599   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6600 }
6601 
6602 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6603   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6604 }
6605 
6606 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6607   OptionalImmIndexMap OptionalIdx;
6608 
6609   unsigned I = 1;
6610   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6611   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6612     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6613   }
6614 
6615   int Fi = 0;
6616   for (unsigned E = Operands.size(); I != E; ++I) {
6617     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6618                                             MCOI::TIED_TO);
6619     if (TiedTo != -1) {
6620       assert((unsigned)TiedTo < Inst.getNumOperands());
6621       // handle tied old or src2 for MAC instructions
6622       Inst.addOperand(Inst.getOperand(TiedTo));
6623     }
6624     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6625     // Add the register arguments
6626     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6627       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6628       // Skip it.
6629       continue;
6630     }
6631 
6632     if (IsDPP8) {
6633       if (Op.isDPP8()) {
6634         Op.addImmOperands(Inst, 1);
6635       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6636         Op.addRegWithFPInputModsOperands(Inst, 2);
6637       } else if (Op.isFI()) {
6638         Fi = Op.getImm();
6639       } else if (Op.isReg()) {
6640         Op.addRegOperands(Inst, 1);
6641       } else {
6642         llvm_unreachable("Invalid operand type");
6643       }
6644     } else {
6645       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6646         Op.addRegWithFPInputModsOperands(Inst, 2);
6647       } else if (Op.isDPPCtrl()) {
6648         Op.addImmOperands(Inst, 1);
6649       } else if (Op.isImm()) {
6650         // Handle optional arguments
6651         OptionalIdx[Op.getImmTy()] = I;
6652       } else {
6653         llvm_unreachable("Invalid operand type");
6654       }
6655     }
6656   }
6657 
6658   if (IsDPP8) {
6659     using namespace llvm::AMDGPU::DPP;
6660     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6661   } else {
6662     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6663     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6664     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6665     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6666       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6667     }
6668   }
6669 }
6670 
6671 //===----------------------------------------------------------------------===//
6672 // sdwa
6673 //===----------------------------------------------------------------------===//
6674 
6675 OperandMatchResultTy
6676 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6677                               AMDGPUOperand::ImmTy Type) {
6678   using namespace llvm::AMDGPU::SDWA;
6679 
6680   SMLoc S = Parser.getTok().getLoc();
6681   StringRef Value;
6682   OperandMatchResultTy res;
6683 
6684   res = parseStringWithPrefix(Prefix, Value);
6685   if (res != MatchOperand_Success) {
6686     return res;
6687   }
6688 
6689   int64_t Int;
6690   Int = StringSwitch<int64_t>(Value)
6691         .Case("BYTE_0", SdwaSel::BYTE_0)
6692         .Case("BYTE_1", SdwaSel::BYTE_1)
6693         .Case("BYTE_2", SdwaSel::BYTE_2)
6694         .Case("BYTE_3", SdwaSel::BYTE_3)
6695         .Case("WORD_0", SdwaSel::WORD_0)
6696         .Case("WORD_1", SdwaSel::WORD_1)
6697         .Case("DWORD", SdwaSel::DWORD)
6698         .Default(0xffffffff);
6699   Parser.Lex(); // eat last token
6700 
6701   if (Int == 0xffffffff) {
6702     return MatchOperand_ParseFail;
6703   }
6704 
6705   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6706   return MatchOperand_Success;
6707 }
6708 
6709 OperandMatchResultTy
6710 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6711   using namespace llvm::AMDGPU::SDWA;
6712 
6713   SMLoc S = Parser.getTok().getLoc();
6714   StringRef Value;
6715   OperandMatchResultTy res;
6716 
6717   res = parseStringWithPrefix("dst_unused", Value);
6718   if (res != MatchOperand_Success) {
6719     return res;
6720   }
6721 
6722   int64_t Int;
6723   Int = StringSwitch<int64_t>(Value)
6724         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6725         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6726         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6727         .Default(0xffffffff);
6728   Parser.Lex(); // eat last token
6729 
6730   if (Int == 0xffffffff) {
6731     return MatchOperand_ParseFail;
6732   }
6733 
6734   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6735   return MatchOperand_Success;
6736 }
6737 
6738 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6739   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6740 }
6741 
6742 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6743   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6744 }
6745 
6746 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6747   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6748 }
6749 
6750 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6751   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6752 }
6753 
6754 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6755                               uint64_t BasicInstType, bool skipVcc) {
6756   using namespace llvm::AMDGPU::SDWA;
6757 
6758   OptionalImmIndexMap OptionalIdx;
6759   bool skippedVcc = false;
6760 
6761   unsigned I = 1;
6762   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6763   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6764     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6765   }
6766 
6767   for (unsigned E = Operands.size(); I != E; ++I) {
6768     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6769     if (skipVcc && !skippedVcc && Op.isReg() &&
6770         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6771       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6772       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6773       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6774       // Skip VCC only if we didn't skip it on previous iteration.
6775       if (BasicInstType == SIInstrFlags::VOP2 &&
6776           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6777         skippedVcc = true;
6778         continue;
6779       } else if (BasicInstType == SIInstrFlags::VOPC &&
6780                  Inst.getNumOperands() == 0) {
6781         skippedVcc = true;
6782         continue;
6783       }
6784     }
6785     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6786       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6787     } else if (Op.isImm()) {
6788       // Handle optional arguments
6789       OptionalIdx[Op.getImmTy()] = I;
6790     } else {
6791       llvm_unreachable("Invalid operand type");
6792     }
6793     skippedVcc = false;
6794   }
6795 
6796   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6797       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6798       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6799     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6800     switch (BasicInstType) {
6801     case SIInstrFlags::VOP1:
6802       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6803       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6804         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6805       }
6806       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6807       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6808       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6809       break;
6810 
6811     case SIInstrFlags::VOP2:
6812       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6813       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6814         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6815       }
6816       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6817       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6818       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6819       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6820       break;
6821 
6822     case SIInstrFlags::VOPC:
6823       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6824         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6825       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6826       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6827       break;
6828 
6829     default:
6830       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6831     }
6832   }
6833 
6834   // special case v_mac_{f16, f32}:
6835   // it has src2 register operand that is tied to dst operand
6836   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6837       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6838     auto it = Inst.begin();
6839     std::advance(
6840       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6841     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6842   }
6843 }
6844 
6845 //===----------------------------------------------------------------------===//
6846 // mAI
6847 //===----------------------------------------------------------------------===//
6848 
6849 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6850   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6851 }
6852 
6853 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6854   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6855 }
6856 
6857 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6858   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6859 }
6860 
6861 /// Force static initialization.
6862 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6863   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6864   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6865 }
6866 
6867 #define GET_REGISTER_MATCHER
6868 #define GET_MATCHER_IMPLEMENTATION
6869 #define GET_MNEMONIC_SPELL_CHECKER
6870 #include "AMDGPUGenAsmMatcher.inc"
6871 
6872 // This fuction should be defined after auto-generated include so that we have
6873 // MatchClassKind enum defined
6874 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6875                                                      unsigned Kind) {
6876   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6877   // But MatchInstructionImpl() expects to meet token and fails to validate
6878   // operand. This method checks if we are given immediate operand but expect to
6879   // get corresponding token.
6880   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6881   switch (Kind) {
6882   case MCK_addr64:
6883     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6884   case MCK_gds:
6885     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6886   case MCK_lds:
6887     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6888   case MCK_glc:
6889     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6890   case MCK_idxen:
6891     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6892   case MCK_offen:
6893     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6894   case MCK_SSrcB32:
6895     // When operands have expression values, they will return true for isToken,
6896     // because it is not possible to distinguish between a token and an
6897     // expression at parse time. MatchInstructionImpl() will always try to
6898     // match an operand as a token, when isToken returns true, and when the
6899     // name of the expression is not a valid token, the match will fail,
6900     // so we need to handle it here.
6901     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6902   case MCK_SSrcF32:
6903     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6904   case MCK_SoppBrTarget:
6905     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6906   case MCK_VReg32OrOff:
6907     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6908   case MCK_InterpSlot:
6909     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6910   case MCK_Attr:
6911     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6912   case MCK_AttrChan:
6913     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6914   default:
6915     return Match_InvalidOperand;
6916   }
6917 }
6918 
6919 //===----------------------------------------------------------------------===//
6920 // endpgm
6921 //===----------------------------------------------------------------------===//
6922 
6923 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6924   SMLoc S = Parser.getTok().getLoc();
6925   int64_t Imm = 0;
6926 
6927   if (!parseExpr(Imm)) {
6928     // The operand is optional, if not present default to 0
6929     Imm = 0;
6930   }
6931 
6932   if (!isUInt<16>(Imm)) {
6933     Error(S, "expected a 16-bit value");
6934     return MatchOperand_ParseFail;
6935   }
6936 
6937   Operands.push_back(
6938       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6939   return MatchOperand_Success;
6940 }
6941 
6942 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6943