1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isSymbolRefExpr();
224   }
225 
226   bool isSymbolRefExpr() const {
227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228   }
229 
230   bool isImm() const override {
231     return Kind == Immediate;
232   }
233 
234   bool isInlinableImm(MVT type) const;
235   bool isLiteralImm(MVT type) const;
236 
237   bool isRegKind() const {
238     return Kind == Register;
239   }
240 
241   bool isReg() const override {
242     return isRegKind() && !hasModifiers();
243   }
244 
245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247   }
248 
249   bool isRegOrImmWithInt16InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251   }
252 
253   bool isRegOrImmWithInt32InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255   }
256 
257   bool isRegOrImmWithInt64InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259   }
260 
261   bool isRegOrImmWithFP16InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263   }
264 
265   bool isRegOrImmWithFP32InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267   }
268 
269   bool isRegOrImmWithFP64InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271   }
272 
273   bool isVReg() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275            isRegClass(AMDGPU::VReg_64RegClassID) ||
276            isRegClass(AMDGPU::VReg_96RegClassID) ||
277            isRegClass(AMDGPU::VReg_128RegClassID) ||
278            isRegClass(AMDGPU::VReg_160RegClassID) ||
279            isRegClass(AMDGPU::VReg_256RegClassID) ||
280            isRegClass(AMDGPU::VReg_512RegClassID) ||
281            isRegClass(AMDGPU::VReg_1024RegClassID);
282   }
283 
284   bool isVReg32() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID);
286   }
287 
288   bool isVReg32OrOff() const {
289     return isOff() || isVReg32();
290   }
291 
292   bool isSDWAOperand(MVT type) const;
293   bool isSDWAFP16Operand() const;
294   bool isSDWAFP32Operand() const;
295   bool isSDWAInt16Operand() const;
296   bool isSDWAInt32Operand() const;
297 
298   bool isImmTy(ImmTy ImmT) const {
299     return isImm() && Imm.Type == ImmT;
300   }
301 
302   bool isImmModifier() const {
303     return isImm() && Imm.Type != ImmTyNone;
304   }
305 
306   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308   bool isDMask() const { return isImmTy(ImmTyDMask); }
309   bool isDim() const { return isImmTy(ImmTyDim); }
310   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311   bool isDA() const { return isImmTy(ImmTyDA); }
312   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313   bool isLWE() const { return isImmTy(ImmTyLWE); }
314   bool isOff() const { return isImmTy(ImmTyOff); }
315   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318   bool isOffen() const { return isImmTy(ImmTyOffen); }
319   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326   bool isGDS() const { return isImmTy(ImmTyGDS); }
327   bool isLDS() const { return isImmTy(ImmTyLDS); }
328   bool isDLC() const { return isImmTy(ImmTyDLC); }
329   bool isGLC() const { return isImmTy(ImmTyGLC); }
330   bool isSLC() const { return isImmTy(ImmTySLC); }
331   bool isTFE() const { return isImmTy(ImmTyTFE); }
332   bool isD16() const { return isImmTy(ImmTyD16); }
333   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337   bool isFI() const { return isImmTy(ImmTyDppFi); }
338   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349   bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351   bool isMod() const {
352     return isClampSI() || isOModSI();
353   }
354 
355   bool isRegOrImm() const {
356     return isReg() || isImm();
357   }
358 
359   bool isRegClass(unsigned RCID) const;
360 
361   bool isInlineValue() const;
362 
363   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365   }
366 
367   bool isSCSrcB16() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369   }
370 
371   bool isSCSrcV2B16() const {
372     return isSCSrcB16();
373   }
374 
375   bool isSCSrcB32() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377   }
378 
379   bool isSCSrcB64() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381   }
382 
383   bool isBoolReg() const;
384 
385   bool isSCSrcF16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387   }
388 
389   bool isSCSrcV2F16() const {
390     return isSCSrcF16();
391   }
392 
393   bool isSCSrcF32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395   }
396 
397   bool isSCSrcF64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399   }
400 
401   bool isSSrcB32() const {
402     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403   }
404 
405   bool isSSrcB16() const {
406     return isSCSrcB16() || isLiteralImm(MVT::i16);
407   }
408 
409   bool isSSrcV2B16() const {
410     llvm_unreachable("cannot happen");
411     return isSSrcB16();
412   }
413 
414   bool isSSrcB64() const {
415     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416     // See isVSrc64().
417     return isSCSrcB64() || isLiteralImm(MVT::i64);
418   }
419 
420   bool isSSrcF32() const {
421     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422   }
423 
424   bool isSSrcF64() const {
425     return isSCSrcB64() || isLiteralImm(MVT::f64);
426   }
427 
428   bool isSSrcF16() const {
429     return isSCSrcB16() || isLiteralImm(MVT::f16);
430   }
431 
432   bool isSSrcV2F16() const {
433     llvm_unreachable("cannot happen");
434     return isSSrcF16();
435   }
436 
437   bool isSSrcOrLdsB32() const {
438     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439            isLiteralImm(MVT::i32) || isExpr();
440   }
441 
442   bool isVCSrcB32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444   }
445 
446   bool isVCSrcB64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448   }
449 
450   bool isVCSrcB16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452   }
453 
454   bool isVCSrcV2B16() const {
455     return isVCSrcB16();
456   }
457 
458   bool isVCSrcF32() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460   }
461 
462   bool isVCSrcF64() const {
463     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464   }
465 
466   bool isVCSrcF16() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468   }
469 
470   bool isVCSrcV2F16() const {
471     return isVCSrcF16();
472   }
473 
474   bool isVSrcB32() const {
475     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476   }
477 
478   bool isVSrcB64() const {
479     return isVCSrcF64() || isLiteralImm(MVT::i64);
480   }
481 
482   bool isVSrcB16() const {
483     return isVCSrcF16() || isLiteralImm(MVT::i16);
484   }
485 
486   bool isVSrcV2B16() const {
487     return isVSrcB16() || isLiteralImm(MVT::v2i16);
488   }
489 
490   bool isVSrcF32() const {
491     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492   }
493 
494   bool isVSrcF64() const {
495     return isVCSrcF64() || isLiteralImm(MVT::f64);
496   }
497 
498   bool isVSrcF16() const {
499     return isVCSrcF16() || isLiteralImm(MVT::f16);
500   }
501 
502   bool isVSrcV2F16() const {
503     return isVSrcF16() || isLiteralImm(MVT::v2f16);
504   }
505 
506   bool isVISrcB32() const {
507     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508   }
509 
510   bool isVISrcB16() const {
511     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512   }
513 
514   bool isVISrcV2B16() const {
515     return isVISrcB16();
516   }
517 
518   bool isVISrcF32() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520   }
521 
522   bool isVISrcF16() const {
523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524   }
525 
526   bool isVISrcV2F16() const {
527     return isVISrcF16() || isVISrcB32();
528   }
529 
530   bool isAISrcB32() const {
531     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532   }
533 
534   bool isAISrcB16() const {
535     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536   }
537 
538   bool isAISrcV2B16() const {
539     return isAISrcB16();
540   }
541 
542   bool isAISrcF32() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544   }
545 
546   bool isAISrcF16() const {
547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548   }
549 
550   bool isAISrcV2F16() const {
551     return isAISrcF16() || isAISrcB32();
552   }
553 
554   bool isAISrc_128B32() const {
555     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556   }
557 
558   bool isAISrc_128B16() const {
559     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560   }
561 
562   bool isAISrc_128V2B16() const {
563     return isAISrc_128B16();
564   }
565 
566   bool isAISrc_128F32() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568   }
569 
570   bool isAISrc_128F16() const {
571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572   }
573 
574   bool isAISrc_128V2F16() const {
575     return isAISrc_128F16() || isAISrc_128B32();
576   }
577 
578   bool isAISrc_512B32() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580   }
581 
582   bool isAISrc_512B16() const {
583     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584   }
585 
586   bool isAISrc_512V2B16() const {
587     return isAISrc_512B16();
588   }
589 
590   bool isAISrc_512F32() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592   }
593 
594   bool isAISrc_512F16() const {
595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596   }
597 
598   bool isAISrc_512V2F16() const {
599     return isAISrc_512F16() || isAISrc_512B32();
600   }
601 
602   bool isAISrc_1024B32() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604   }
605 
606   bool isAISrc_1024B16() const {
607     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608   }
609 
610   bool isAISrc_1024V2B16() const {
611     return isAISrc_1024B16();
612   }
613 
614   bool isAISrc_1024F32() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616   }
617 
618   bool isAISrc_1024F16() const {
619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620   }
621 
622   bool isAISrc_1024V2F16() const {
623     return isAISrc_1024F16() || isAISrc_1024B32();
624   }
625 
626   bool isKImmFP32() const {
627     return isLiteralImm(MVT::f32);
628   }
629 
630   bool isKImmFP16() const {
631     return isLiteralImm(MVT::f16);
632   }
633 
634   bool isMem() const override {
635     return false;
636   }
637 
638   bool isExpr() const {
639     return Kind == Expression;
640   }
641 
642   bool isSoppBrTarget() const {
643     return isExpr() || isImm();
644   }
645 
646   bool isSWaitCnt() const;
647   bool isHwreg() const;
648   bool isSendMsg() const;
649   bool isSwizzle() const;
650   bool isSMRDOffset8() const;
651   bool isSMRDOffset20() const;
652   bool isSMRDLiteralOffset() const;
653   bool isDPP8() const;
654   bool isDPPCtrl() const;
655   bool isBLGP() const;
656   bool isCBSZ() const;
657   bool isABID() const;
658   bool isGPRIdxMode() const;
659   bool isS16Imm() const;
660   bool isU16Imm() const;
661   bool isEndpgm() const;
662 
663   StringRef getExpressionAsToken() const {
664     assert(isExpr());
665     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666     return S->getSymbol().getName();
667   }
668 
669   StringRef getToken() const {
670     assert(isToken());
671 
672     if (Kind == Expression)
673       return getExpressionAsToken();
674 
675     return StringRef(Tok.Data, Tok.Length);
676   }
677 
678   int64_t getImm() const {
679     assert(isImm());
680     return Imm.Val;
681   }
682 
683   ImmTy getImmTy() const {
684     assert(isImm());
685     return Imm.Type;
686   }
687 
688   unsigned getReg() const override {
689     assert(isRegKind());
690     return Reg.RegNo;
691   }
692 
693   SMLoc getStartLoc() const override {
694     return StartLoc;
695   }
696 
697   SMLoc getEndLoc() const override {
698     return EndLoc;
699   }
700 
701   SMRange getLocRange() const {
702     return SMRange(StartLoc, EndLoc);
703   }
704 
705   Modifiers getModifiers() const {
706     assert(isRegKind() || isImmTy(ImmTyNone));
707     return isRegKind() ? Reg.Mods : Imm.Mods;
708   }
709 
710   void setModifiers(Modifiers Mods) {
711     assert(isRegKind() || isImmTy(ImmTyNone));
712     if (isRegKind())
713       Reg.Mods = Mods;
714     else
715       Imm.Mods = Mods;
716   }
717 
718   bool hasModifiers() const {
719     return getModifiers().hasModifiers();
720   }
721 
722   bool hasFPModifiers() const {
723     return getModifiers().hasFPModifiers();
724   }
725 
726   bool hasIntModifiers() const {
727     return getModifiers().hasIntModifiers();
728   }
729 
730   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736   template <unsigned Bitwidth>
737   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740     addKImmFPOperands<16>(Inst, N);
741   }
742 
743   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744     addKImmFPOperands<32>(Inst, N);
745   }
746 
747   void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750     addRegOperands(Inst, N);
751   }
752 
753   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754     if (isRegKind())
755       addRegOperands(Inst, N);
756     else if (isExpr())
757       Inst.addOperand(MCOperand::createExpr(Expr));
758     else
759       addImmOperands(Inst, N);
760   }
761 
762   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763     Modifiers Mods = getModifiers();
764     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765     if (isRegKind()) {
766       addRegOperands(Inst, N);
767     } else {
768       addImmOperands(Inst, N, false);
769     }
770   }
771 
772   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773     assert(!hasIntModifiers());
774     addRegOrImmWithInputModsOperands(Inst, N);
775   }
776 
777   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778     assert(!hasFPModifiers());
779     addRegOrImmWithInputModsOperands(Inst, N);
780   }
781 
782   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     assert(isRegKind());
786     addRegOperands(Inst, N);
787   }
788 
789   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790     assert(!hasIntModifiers());
791     addRegWithInputModsOperands(Inst, N);
792   }
793 
794   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795     assert(!hasFPModifiers());
796     addRegWithInputModsOperands(Inst, N);
797   }
798 
799   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800     if (isImm())
801       addImmOperands(Inst, N);
802     else {
803       assert(isExpr());
804       Inst.addOperand(MCOperand::createExpr(Expr));
805     }
806   }
807 
808   static void printImmTy(raw_ostream& OS, ImmTy Type) {
809     switch (Type) {
810     case ImmTyNone: OS << "None"; break;
811     case ImmTyGDS: OS << "GDS"; break;
812     case ImmTyLDS: OS << "LDS"; break;
813     case ImmTyOffen: OS << "Offen"; break;
814     case ImmTyIdxen: OS << "Idxen"; break;
815     case ImmTyAddr64: OS << "Addr64"; break;
816     case ImmTyOffset: OS << "Offset"; break;
817     case ImmTyInstOffset: OS << "InstOffset"; break;
818     case ImmTyOffset0: OS << "Offset0"; break;
819     case ImmTyOffset1: OS << "Offset1"; break;
820     case ImmTyDLC: OS << "DLC"; break;
821     case ImmTyGLC: OS << "GLC"; break;
822     case ImmTySLC: OS << "SLC"; break;
823     case ImmTyTFE: OS << "TFE"; break;
824     case ImmTyD16: OS << "D16"; break;
825     case ImmTyFORMAT: OS << "FORMAT"; break;
826     case ImmTyClampSI: OS << "ClampSI"; break;
827     case ImmTyOModSI: OS << "OModSI"; break;
828     case ImmTyDPP8: OS << "DPP8"; break;
829     case ImmTyDppCtrl: OS << "DppCtrl"; break;
830     case ImmTyDppRowMask: OS << "DppRowMask"; break;
831     case ImmTyDppBankMask: OS << "DppBankMask"; break;
832     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833     case ImmTyDppFi: OS << "FI"; break;
834     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838     case ImmTyDMask: OS << "DMask"; break;
839     case ImmTyDim: OS << "Dim"; break;
840     case ImmTyUNorm: OS << "UNorm"; break;
841     case ImmTyDA: OS << "DA"; break;
842     case ImmTyR128A16: OS << "R128A16"; break;
843     case ImmTyLWE: OS << "LWE"; break;
844     case ImmTyOff: OS << "Off"; break;
845     case ImmTyExpTgt: OS << "ExpTgt"; break;
846     case ImmTyExpCompr: OS << "ExpCompr"; break;
847     case ImmTyExpVM: OS << "ExpVM"; break;
848     case ImmTyHwreg: OS << "Hwreg"; break;
849     case ImmTySendMsg: OS << "SendMsg"; break;
850     case ImmTyInterpSlot: OS << "InterpSlot"; break;
851     case ImmTyInterpAttr: OS << "InterpAttr"; break;
852     case ImmTyAttrChan: OS << "AttrChan"; break;
853     case ImmTyOpSel: OS << "OpSel"; break;
854     case ImmTyOpSelHi: OS << "OpSelHi"; break;
855     case ImmTyNegLo: OS << "NegLo"; break;
856     case ImmTyNegHi: OS << "NegHi"; break;
857     case ImmTySwizzle: OS << "Swizzle"; break;
858     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859     case ImmTyHigh: OS << "High"; break;
860     case ImmTyBLGP: OS << "BLGP"; break;
861     case ImmTyCBSZ: OS << "CBSZ"; break;
862     case ImmTyABID: OS << "ABID"; break;
863     case ImmTyEndpgm: OS << "Endpgm"; break;
864     }
865   }
866 
867   void print(raw_ostream &OS) const override {
868     switch (Kind) {
869     case Register:
870       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871       break;
872     case Immediate:
873       OS << '<' << getImm();
874       if (getImmTy() != ImmTyNone) {
875         OS << " type: "; printImmTy(OS, getImmTy());
876       }
877       OS << " mods: " << Imm.Mods << '>';
878       break;
879     case Token:
880       OS << '\'' << getToken() << '\'';
881       break;
882     case Expression:
883       OS << "<expr " << *Expr << '>';
884       break;
885     }
886   }
887 
888   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889                                       int64_t Val, SMLoc Loc,
890                                       ImmTy Type = ImmTyNone,
891                                       bool IsFPImm = false) {
892     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893     Op->Imm.Val = Val;
894     Op->Imm.IsFPImm = IsFPImm;
895     Op->Imm.Type = Type;
896     Op->Imm.Mods = Modifiers();
897     Op->StartLoc = Loc;
898     Op->EndLoc = Loc;
899     return Op;
900   }
901 
902   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903                                         StringRef Str, SMLoc Loc,
904                                         bool HasExplicitEncodingSize = true) {
905     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906     Res->Tok.Data = Str.data();
907     Res->Tok.Length = Str.size();
908     Res->StartLoc = Loc;
909     Res->EndLoc = Loc;
910     return Res;
911   }
912 
913   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914                                       unsigned RegNo, SMLoc S,
915                                       SMLoc E) {
916     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917     Op->Reg.RegNo = RegNo;
918     Op->Reg.Mods = Modifiers();
919     Op->StartLoc = S;
920     Op->EndLoc = E;
921     return Op;
922   }
923 
924   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925                                        const class MCExpr *Expr, SMLoc S) {
926     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927     Op->Expr = Expr;
928     Op->StartLoc = S;
929     Op->EndLoc = S;
930     return Op;
931   }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936   return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947   int SgprIndexUnusedMin = -1;
948   int VgprIndexUnusedMin = -1;
949   MCContext *Ctx = nullptr;
950 
951   void usesSgprAt(int i) {
952     if (i >= SgprIndexUnusedMin) {
953       SgprIndexUnusedMin = ++i;
954       if (Ctx) {
955         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957       }
958     }
959   }
960 
961   void usesVgprAt(int i) {
962     if (i >= VgprIndexUnusedMin) {
963       VgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971 public:
972   KernelScopeInfo() = default;
973 
974   void initialize(MCContext &Context) {
975     Ctx = &Context;
976     usesSgprAt(SgprIndexUnusedMin = -1);
977     usesVgprAt(VgprIndexUnusedMin = -1);
978   }
979 
980   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981     switch (RegKind) {
982       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983       case IS_AGPR: // fall through
984       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985       default: break;
986     }
987   }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991   MCAsmParser &Parser;
992 
993   // Number of extra operands parsed after the first optional operand.
994   // This may be necessary to skip hardcoded mandatory operands.
995   static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997   unsigned ForcedEncodingSize = 0;
998   bool ForcedDPP = false;
999   bool ForcedSDWA = false;
1000   KernelScopeInfo KernelScope;
1001 
1002   /// @name Auto-generated Match Functions
1003   /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008   /// }
1009 
1010 private:
1011   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012   bool OutOfRangeError(SMRange Range);
1013   /// Calculate VGPR/SGPR blocks required for given target, reserved
1014   /// registers, and user-specified NextFreeXGPR values.
1015   ///
1016   /// \param Features [in] Target features, used for bug corrections.
1017   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021   /// descriptor field, if valid.
1022   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026   /// \param VGPRBlocks [out] Result VGPR block count.
1027   /// \param SGPRBlocks [out] Result SGPR block count.
1028   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029                           bool FlatScrUsed, bool XNACKUsed,
1030                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031                           SMRange VGPRRange, unsigned NextFreeSGPR,
1032                           SMRange SGPRRange, unsigned &VGPRBlocks,
1033                           unsigned &SGPRBlocks);
1034   bool ParseDirectiveAMDGCNTarget();
1035   bool ParseDirectiveAMDHSAKernel();
1036   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037   bool ParseDirectiveHSACodeObjectVersion();
1038   bool ParseDirectiveHSACodeObjectISA();
1039   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040   bool ParseDirectiveAMDKernelCodeT();
1041   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042   bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044   bool ParseDirectiveISAVersion();
1045   bool ParseDirectiveHSAMetadata();
1046   bool ParseDirectivePALMetadataBegin();
1047   bool ParseDirectivePALMetadata();
1048   bool ParseDirectiveAMDGPULDS();
1049 
1050   /// Common code to parse out a block of text (typically YAML) between start and
1051   /// end directives.
1052   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053                            const char *AssemblerDirectiveEnd,
1054                            std::string &CollectString);
1055 
1056   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057                              RegisterKind RegKind, unsigned Reg1,
1058                              unsigned RegNum);
1059   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060                            unsigned& RegNum, unsigned& RegWidth,
1061                            unsigned *DwordRegIndex);
1062   bool isRegister();
1063   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065   void initializeGprCountSymbol(RegisterKind RegKind);
1066   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067                              unsigned RegWidth);
1068   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071                  bool IsGdsHardcoded);
1072 
1073 public:
1074   enum AMDGPUMatchResultTy {
1075     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076   };
1077   enum OperandMode {
1078     OperandMode_Default,
1079     OperandMode_NSA,
1080   };
1081 
1082   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083 
1084   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085                const MCInstrInfo &MII,
1086                const MCTargetOptions &Options)
1087       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088     MCAsmParserExtension::Initialize(Parser);
1089 
1090     if (getFeatureBits().none()) {
1091       // Set default features.
1092       copySTI().ToggleFeature("southern-islands");
1093     }
1094 
1095     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096 
1097     {
1098       // TODO: make those pre-defined variables read-only.
1099       // Currently there is none suitable machinery in the core llvm-mc for this.
1100       // MCSymbol::isRedefinable is intended for another purpose, and
1101       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103       MCContext &Ctx = getContext();
1104       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105         MCSymbol *Sym =
1106             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112       } else {
1113         MCSymbol *Sym =
1114             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120       }
1121       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122         initializeGprCountSymbol(IS_VGPR);
1123         initializeGprCountSymbol(IS_SGPR);
1124       } else
1125         KernelScope.initialize(getContext());
1126     }
1127   }
1128 
1129   bool hasXNACK() const {
1130     return AMDGPU::hasXNACK(getSTI());
1131   }
1132 
1133   bool hasMIMG_R128() const {
1134     return AMDGPU::hasMIMG_R128(getSTI());
1135   }
1136 
1137   bool hasPackedD16() const {
1138     return AMDGPU::hasPackedD16(getSTI());
1139   }
1140 
1141   bool isSI() const {
1142     return AMDGPU::isSI(getSTI());
1143   }
1144 
1145   bool isCI() const {
1146     return AMDGPU::isCI(getSTI());
1147   }
1148 
1149   bool isVI() const {
1150     return AMDGPU::isVI(getSTI());
1151   }
1152 
1153   bool isGFX9() const {
1154     return AMDGPU::isGFX9(getSTI());
1155   }
1156 
1157   bool isGFX10() const {
1158     return AMDGPU::isGFX10(getSTI());
1159   }
1160 
1161   bool hasInv2PiInlineImm() const {
1162     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163   }
1164 
1165   bool hasFlatOffsets() const {
1166     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167   }
1168 
1169   bool hasSGPR102_SGPR103() const {
1170     return !isVI() && !isGFX9();
1171   }
1172 
1173   bool hasSGPR104_SGPR105() const {
1174     return isGFX10();
1175   }
1176 
1177   bool hasIntClamp() const {
1178     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179   }
1180 
1181   AMDGPUTargetStreamer &getTargetStreamer() {
1182     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183     return static_cast<AMDGPUTargetStreamer &>(TS);
1184   }
1185 
1186   const MCRegisterInfo *getMRI() const {
1187     // We need this const_cast because for some reason getContext() is not const
1188     // in MCAsmParser.
1189     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190   }
1191 
1192   const MCInstrInfo *getMII() const {
1193     return &MII;
1194   }
1195 
1196   const FeatureBitset &getFeatureBits() const {
1197     return getSTI().getFeatureBits();
1198   }
1199 
1200   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203 
1204   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206   bool isForcedDPP() const { return ForcedDPP; }
1207   bool isForcedSDWA() const { return ForcedSDWA; }
1208   ArrayRef<unsigned> getMatchedVariants() const;
1209 
1210   std::unique_ptr<AMDGPUOperand> parseRegister();
1211   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214                                       unsigned Kind) override;
1215   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216                                OperandVector &Operands, MCStreamer &Out,
1217                                uint64_t &ErrorInfo,
1218                                bool MatchingInlineAsm) override;
1219   bool ParseDirective(AsmToken DirectiveID) override;
1220   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221                                     OperandMode Mode = OperandMode_Default);
1222   StringRef parseMnemonicSuffix(StringRef Name);
1223   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224                         SMLoc NameLoc, OperandVector &Operands) override;
1225   //bool ProcessInstruction(MCInst &Inst);
1226 
1227   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228 
1229   OperandMatchResultTy
1230   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232                      bool (*ConvertResult)(int64_t &) = nullptr);
1233 
1234   OperandMatchResultTy
1235   parseOperandArrayWithPrefix(const char *Prefix,
1236                               OperandVector &Operands,
1237                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238                               bool (*ConvertResult)(int64_t&) = nullptr);
1239 
1240   OperandMatchResultTy
1241   parseNamedBit(const char *Name, OperandVector &Operands,
1242                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244                                              StringRef &Value);
1245 
1246   bool isModifier();
1247   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251   bool parseSP3NegModifier();
1252   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253   OperandMatchResultTy parseReg(OperandVector &Operands);
1254   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261 
1262   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266 
1267   bool parseCnt(int64_t &IntVal);
1268   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270 
1271 private:
1272   struct OperandInfoTy {
1273     int64_t Id;
1274     bool IsSymbolic = false;
1275     bool IsDefined = false;
1276 
1277     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278   };
1279 
1280   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281   bool validateSendMsg(const OperandInfoTy &Msg,
1282                        const OperandInfoTy &Op,
1283                        const OperandInfoTy &Stream,
1284                        const SMLoc Loc);
1285 
1286   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287   bool validateHwreg(const OperandInfoTy &HwReg,
1288                      const int64_t Offset,
1289                      const int64_t Width,
1290                      const SMLoc Loc);
1291 
1292   void errorExpTgt();
1293   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295 
1296   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298   bool validateSOPLiteral(const MCInst &Inst) const;
1299   bool validateConstantBusLimitations(const MCInst &Inst);
1300   bool validateEarlyClobberLimitations(const MCInst &Inst);
1301   bool validateIntClampSupported(const MCInst &Inst);
1302   bool validateMIMGAtomicDMask(const MCInst &Inst);
1303   bool validateMIMGGatherDMask(const MCInst &Inst);
1304   bool validateMIMGDataSize(const MCInst &Inst);
1305   bool validateMIMGAddrSize(const MCInst &Inst);
1306   bool validateMIMGD16(const MCInst &Inst);
1307   bool validateMIMGDim(const MCInst &Inst);
1308   bool validateLdsDirect(const MCInst &Inst);
1309   bool validateOpSel(const MCInst &Inst);
1310   bool validateVccOperand(unsigned Reg) const;
1311   bool validateVOP3Literal(const MCInst &Inst) const;
1312   unsigned getConstantBusLimit(unsigned Opcode) const;
1313   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1314   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1315   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 
1317   bool isId(const StringRef Id) const;
1318   bool isId(const AsmToken &Token, const StringRef Id) const;
1319   bool isToken(const AsmToken::TokenKind Kind) const;
1320   bool trySkipId(const StringRef Id);
1321   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1322   bool trySkipToken(const AsmToken::TokenKind Kind);
1323   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1324   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1325   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1326   AsmToken::TokenKind getTokenKind() const;
1327   bool parseExpr(int64_t &Imm);
1328   bool parseExpr(OperandVector &Operands);
1329   StringRef getTokenStr() const;
1330   AsmToken peekToken();
1331   AsmToken getToken() const;
1332   SMLoc getLoc() const;
1333   void lex();
1334 
1335 public:
1336   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1337   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 
1339   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1340   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1341   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1342   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1343   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1344   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 
1346   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1347                             const unsigned MinVal,
1348                             const unsigned MaxVal,
1349                             const StringRef ErrMsg);
1350   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1351   bool parseSwizzleOffset(int64_t &Imm);
1352   bool parseSwizzleMacro(int64_t &Imm);
1353   bool parseSwizzleQuadPerm(int64_t &Imm);
1354   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1355   bool parseSwizzleBroadcast(int64_t &Imm);
1356   bool parseSwizzleSwap(int64_t &Imm);
1357   bool parseSwizzleReverse(int64_t &Imm);
1358 
1359   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1360   int64_t parseGPRIdxMacro();
1361 
1362   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1363   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1364   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1365   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1366   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 
1368   AMDGPUOperand::Ptr defaultDLC() const;
1369   AMDGPUOperand::Ptr defaultGLC() const;
1370   AMDGPUOperand::Ptr defaultSLC() const;
1371 
1372   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1373   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1374   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1375   AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 
1377   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 
1379   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1380                OptionalImmIndexMap &OptionalIdx);
1381   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1382   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1383   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 
1385   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 
1387   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1388                bool IsAtomic = false);
1389   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 
1391   OperandMatchResultTy parseDim(OperandVector &Operands);
1392   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1393   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1394   AMDGPUOperand::Ptr defaultRowMask() const;
1395   AMDGPUOperand::Ptr defaultBankMask() const;
1396   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1397   AMDGPUOperand::Ptr defaultFI() const;
1398   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1399   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 
1401   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1402                                     AMDGPUOperand::ImmTy Type);
1403   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1404   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1405   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1406   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1407   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1408   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1409                 uint64_t BasicInstType, bool skipVcc = false);
1410 
1411   AMDGPUOperand::Ptr defaultBLGP() const;
1412   AMDGPUOperand::Ptr defaultCBSZ() const;
1413   AMDGPUOperand::Ptr defaultABID() const;
1414 
1415   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1416   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1417 };
1418 
1419 struct OptionalOperand {
1420   const char *Name;
1421   AMDGPUOperand::ImmTy Type;
1422   bool IsBit;
1423   bool (*ConvertResult)(int64_t&);
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics *getFltSemantics(unsigned Size) {
1430   switch (Size) {
1431   case 4:
1432     return &APFloat::IEEEsingle();
1433   case 8:
1434     return &APFloat::IEEEdouble();
1435   case 2:
1436     return &APFloat::IEEEhalf();
1437   default:
1438     llvm_unreachable("unsupported fp type");
1439   }
1440 }
1441 
1442 static const fltSemantics *getFltSemantics(MVT VT) {
1443   return getFltSemantics(VT.getSizeInBits() / 8);
1444 }
1445 
1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1447   switch (OperandType) {
1448   case AMDGPU::OPERAND_REG_IMM_INT32:
1449   case AMDGPU::OPERAND_REG_IMM_FP32:
1450   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1451   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1452   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1453   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1454     return &APFloat::IEEEsingle();
1455   case AMDGPU::OPERAND_REG_IMM_INT64:
1456   case AMDGPU::OPERAND_REG_IMM_FP64:
1457   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1458   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1459     return &APFloat::IEEEdouble();
1460   case AMDGPU::OPERAND_REG_IMM_INT16:
1461   case AMDGPU::OPERAND_REG_IMM_FP16:
1462   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1463   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1464   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1465   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1466   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1467   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1468   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1469   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1470   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1471   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1472     return &APFloat::IEEEhalf();
1473   default:
1474     llvm_unreachable("unsupported fp type");
1475   }
1476 }
1477 
1478 //===----------------------------------------------------------------------===//
1479 // Operand
1480 //===----------------------------------------------------------------------===//
1481 
1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1483   bool Lost;
1484 
1485   // Convert literal to single precision
1486   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1487                                                APFloat::rmNearestTiesToEven,
1488                                                &Lost);
1489   // We allow precision lost but not overflow or underflow
1490   if (Status != APFloat::opOK &&
1491       Lost &&
1492       ((Status & APFloat::opOverflow)  != 0 ||
1493        (Status & APFloat::opUnderflow) != 0)) {
1494     return false;
1495   }
1496 
1497   return true;
1498 }
1499 
1500 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1501   return isUIntN(Size, Val) || isIntN(Size, Val);
1502 }
1503 
1504 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 
1506   // This is a hack to enable named inline values like
1507   // shared_base with both 32-bit and 64-bit operands.
1508   // Note that these values are defined as
1509   // 32-bit operands only.
1510   if (isInlineValue()) {
1511     return true;
1512   }
1513 
1514   if (!isImmTy(ImmTyNone)) {
1515     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1516     return false;
1517   }
1518   // TODO: We should avoid using host float here. It would be better to
1519   // check the float bit values which is what a few other places do.
1520   // We've had bot failures before due to weird NaN support on mips hosts.
1521 
1522   APInt Literal(64, Imm.Val);
1523 
1524   if (Imm.IsFPImm) { // We got fp literal token
1525     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1526       return AMDGPU::isInlinableLiteral64(Imm.Val,
1527                                           AsmParser->hasInv2PiInlineImm());
1528     }
1529 
1530     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1531     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1532       return false;
1533 
1534     if (type.getScalarSizeInBits() == 16) {
1535       return AMDGPU::isInlinableLiteral16(
1536         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1537         AsmParser->hasInv2PiInlineImm());
1538     }
1539 
1540     // Check if single precision literal is inlinable
1541     return AMDGPU::isInlinableLiteral32(
1542       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1543       AsmParser->hasInv2PiInlineImm());
1544   }
1545 
1546   // We got int literal token.
1547   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548     return AMDGPU::isInlinableLiteral64(Imm.Val,
1549                                         AsmParser->hasInv2PiInlineImm());
1550   }
1551 
1552   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1553     return false;
1554   }
1555 
1556   if (type.getScalarSizeInBits() == 16) {
1557     return AMDGPU::isInlinableLiteral16(
1558       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1559       AsmParser->hasInv2PiInlineImm());
1560   }
1561 
1562   return AMDGPU::isInlinableLiteral32(
1563     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1564     AsmParser->hasInv2PiInlineImm());
1565 }
1566 
1567 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1568   // Check that this immediate can be added as literal
1569   if (!isImmTy(ImmTyNone)) {
1570     return false;
1571   }
1572 
1573   if (!Imm.IsFPImm) {
1574     // We got int literal token.
1575 
1576     if (type == MVT::f64 && hasFPModifiers()) {
1577       // Cannot apply fp modifiers to int literals preserving the same semantics
1578       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579       // disable these cases.
1580       return false;
1581     }
1582 
1583     unsigned Size = type.getSizeInBits();
1584     if (Size == 64)
1585       Size = 32;
1586 
1587     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1588     // types.
1589     return isSafeTruncation(Imm.Val, Size);
1590   }
1591 
1592   // We got fp literal token
1593   if (type == MVT::f64) { // Expected 64-bit fp operand
1594     // We would set low 64-bits of literal to zeroes but we accept this literals
1595     return true;
1596   }
1597 
1598   if (type == MVT::i64) { // Expected 64-bit int operand
1599     // We don't allow fp literals in 64-bit integer instructions. It is
1600     // unclear how we should encode them.
1601     return false;
1602   }
1603 
1604   // We allow fp literals with f16x2 operands assuming that the specified
1605   // literal goes into the lower half and the upper half is zero. We also
1606   // require that the literal may be losslesly converted to f16.
1607   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1608                      (type == MVT::v2i16)? MVT::i16 : type;
1609 
1610   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1611   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1612 }
1613 
1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1615   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1616 }
1617 
1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1619   if (AsmParser->isVI())
1620     return isVReg32();
1621   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1622     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1623   else
1624     return false;
1625 }
1626 
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628   return isSDWAOperand(MVT::f16);
1629 }
1630 
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632   return isSDWAOperand(MVT::f32);
1633 }
1634 
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636   return isSDWAOperand(MVT::i16);
1637 }
1638 
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640   return isSDWAOperand(MVT::i32);
1641 }
1642 
1643 bool AMDGPUOperand::isBoolReg() const {
1644   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1645          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1646 }
1647 
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 {
1650   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1651   assert(Size == 2 || Size == 4 || Size == 8);
1652 
1653   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 
1655   if (Imm.Mods.Abs) {
1656     Val &= ~FpSignMask;
1657   }
1658   if (Imm.Mods.Neg) {
1659     Val ^= FpSignMask;
1660   }
1661 
1662   return Val;
1663 }
1664 
1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1666   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1667                              Inst.getNumOperands())) {
1668     addLiteralImmOperand(Inst, Imm.Val,
1669                          ApplyModifiers &
1670                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1671   } else {
1672     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1673     Inst.addOperand(MCOperand::createImm(Imm.Val));
1674   }
1675 }
1676 
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1678   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1679   auto OpNum = Inst.getNumOperands();
1680   // Check that this operand accepts literals
1681   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 
1683   if (ApplyModifiers) {
1684     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1685     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1686     Val = applyInputFPModifiers(Val, Size);
1687   }
1688 
1689   APInt Literal(64, Val);
1690   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 
1692   if (Imm.IsFPImm) { // We got fp literal token
1693     switch (OpTy) {
1694     case AMDGPU::OPERAND_REG_IMM_INT64:
1695     case AMDGPU::OPERAND_REG_IMM_FP64:
1696     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1697     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1698       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1699                                        AsmParser->hasInv2PiInlineImm())) {
1700         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1701         return;
1702       }
1703 
1704       // Non-inlineable
1705       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1706         // For fp operands we check if low 32 bits are zeros
1707         if (Literal.getLoBits(32) != 0) {
1708           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1709           "Can't encode literal as exact 64-bit floating-point operand. "
1710           "Low 32-bits will be set to zero");
1711         }
1712 
1713         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1714         return;
1715       }
1716 
1717       // We don't allow fp literals in 64-bit integer instructions. It is
1718       // unclear how we should encode them. This case should be checked earlier
1719       // in predicate methods (isLiteralImm())
1720       llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 
1722     case AMDGPU::OPERAND_REG_IMM_INT32:
1723     case AMDGPU::OPERAND_REG_IMM_FP32:
1724     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1725     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1726     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1727     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1728     case AMDGPU::OPERAND_REG_IMM_INT16:
1729     case AMDGPU::OPERAND_REG_IMM_FP16:
1730     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1740       bool lost;
1741       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1742       // Convert literal to single precision
1743       FPLiteral.convert(*getOpFltSemantics(OpTy),
1744                         APFloat::rmNearestTiesToEven, &lost);
1745       // We allow precision lost but not overflow or underflow. This should be
1746       // checked earlier in isLiteralImm()
1747 
1748       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1749       Inst.addOperand(MCOperand::createImm(ImmVal));
1750       return;
1751     }
1752     default:
1753       llvm_unreachable("invalid operand size");
1754     }
1755 
1756     return;
1757   }
1758 
1759   // We got int literal token.
1760   // Only sign extend inline immediates.
1761   switch (OpTy) {
1762   case AMDGPU::OPERAND_REG_IMM_INT32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32:
1764   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1769   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1770     if (isSafeTruncation(Val, 32) &&
1771         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1772                                      AsmParser->hasInv2PiInlineImm())) {
1773       Inst.addOperand(MCOperand::createImm(Val));
1774       return;
1775     }
1776 
1777     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1778     return;
1779 
1780   case AMDGPU::OPERAND_REG_IMM_INT64:
1781   case AMDGPU::OPERAND_REG_IMM_FP64:
1782   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1783   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1784     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1785       Inst.addOperand(MCOperand::createImm(Val));
1786       return;
1787     }
1788 
1789     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1790     return;
1791 
1792   case AMDGPU::OPERAND_REG_IMM_INT16:
1793   case AMDGPU::OPERAND_REG_IMM_FP16:
1794   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1795   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1798     if (isSafeTruncation(Val, 16) &&
1799         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1800                                      AsmParser->hasInv2PiInlineImm())) {
1801       Inst.addOperand(MCOperand::createImm(Val));
1802       return;
1803     }
1804 
1805     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1806     return;
1807 
1808   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1809   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1810   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1811   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1812     assert(isSafeTruncation(Val, 16));
1813     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1814                                         AsmParser->hasInv2PiInlineImm()));
1815 
1816     Inst.addOperand(MCOperand::createImm(Val));
1817     return;
1818   }
1819   default:
1820     llvm_unreachable("invalid operand size");
1821   }
1822 }
1823 
1824 template <unsigned Bitwidth>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1826   APInt Literal(64, Imm.Val);
1827 
1828   if (!Imm.IsFPImm) {
1829     // We got int literal token.
1830     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1831     return;
1832   }
1833 
1834   bool Lost;
1835   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1836   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1837                     APFloat::rmNearestTiesToEven, &Lost);
1838   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1839 }
1840 
1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1842   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1843 }
1844 
1845 static bool isInlineValue(unsigned Reg) {
1846   switch (Reg) {
1847   case AMDGPU::SRC_SHARED_BASE:
1848   case AMDGPU::SRC_SHARED_LIMIT:
1849   case AMDGPU::SRC_PRIVATE_BASE:
1850   case AMDGPU::SRC_PRIVATE_LIMIT:
1851   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1852     return true;
1853   case AMDGPU::SRC_VCCZ:
1854   case AMDGPU::SRC_EXECZ:
1855   case AMDGPU::SRC_SCC:
1856     return true;
1857   case AMDGPU::SGPR_NULL:
1858     return true;
1859   default:
1860     return false;
1861   }
1862 }
1863 
1864 bool AMDGPUOperand::isInlineValue() const {
1865   return isRegKind() && ::isInlineValue(getReg());
1866 }
1867 
1868 //===----------------------------------------------------------------------===//
1869 // AsmParser
1870 //===----------------------------------------------------------------------===//
1871 
1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1873   if (Is == IS_VGPR) {
1874     switch (RegWidth) {
1875       default: return -1;
1876       case 1: return AMDGPU::VGPR_32RegClassID;
1877       case 2: return AMDGPU::VReg_64RegClassID;
1878       case 3: return AMDGPU::VReg_96RegClassID;
1879       case 4: return AMDGPU::VReg_128RegClassID;
1880       case 5: return AMDGPU::VReg_160RegClassID;
1881       case 8: return AMDGPU::VReg_256RegClassID;
1882       case 16: return AMDGPU::VReg_512RegClassID;
1883       case 32: return AMDGPU::VReg_1024RegClassID;
1884     }
1885   } else if (Is == IS_TTMP) {
1886     switch (RegWidth) {
1887       default: return -1;
1888       case 1: return AMDGPU::TTMP_32RegClassID;
1889       case 2: return AMDGPU::TTMP_64RegClassID;
1890       case 4: return AMDGPU::TTMP_128RegClassID;
1891       case 8: return AMDGPU::TTMP_256RegClassID;
1892       case 16: return AMDGPU::TTMP_512RegClassID;
1893     }
1894   } else if (Is == IS_SGPR) {
1895     switch (RegWidth) {
1896       default: return -1;
1897       case 1: return AMDGPU::SGPR_32RegClassID;
1898       case 2: return AMDGPU::SGPR_64RegClassID;
1899       case 4: return AMDGPU::SGPR_128RegClassID;
1900       case 8: return AMDGPU::SGPR_256RegClassID;
1901       case 16: return AMDGPU::SGPR_512RegClassID;
1902     }
1903   } else if (Is == IS_AGPR) {
1904     switch (RegWidth) {
1905       default: return -1;
1906       case 1: return AMDGPU::AGPR_32RegClassID;
1907       case 2: return AMDGPU::AReg_64RegClassID;
1908       case 4: return AMDGPU::AReg_128RegClassID;
1909       case 16: return AMDGPU::AReg_512RegClassID;
1910       case 32: return AMDGPU::AReg_1024RegClassID;
1911     }
1912   }
1913   return -1;
1914 }
1915 
1916 static unsigned getSpecialRegForName(StringRef RegName) {
1917   return StringSwitch<unsigned>(RegName)
1918     .Case("exec", AMDGPU::EXEC)
1919     .Case("vcc", AMDGPU::VCC)
1920     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1921     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1922     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1923     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1924     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1925     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1926     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1927     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1928     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1929     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1930     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1931     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1932     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1933     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1934     .Case("m0", AMDGPU::M0)
1935     .Case("vccz", AMDGPU::SRC_VCCZ)
1936     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1937     .Case("execz", AMDGPU::SRC_EXECZ)
1938     .Case("src_execz", AMDGPU::SRC_EXECZ)
1939     .Case("scc", AMDGPU::SRC_SCC)
1940     .Case("src_scc", AMDGPU::SRC_SCC)
1941     .Case("tba", AMDGPU::TBA)
1942     .Case("tma", AMDGPU::TMA)
1943     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1944     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1945     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1946     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1947     .Case("vcc_lo", AMDGPU::VCC_LO)
1948     .Case("vcc_hi", AMDGPU::VCC_HI)
1949     .Case("exec_lo", AMDGPU::EXEC_LO)
1950     .Case("exec_hi", AMDGPU::EXEC_HI)
1951     .Case("tma_lo", AMDGPU::TMA_LO)
1952     .Case("tma_hi", AMDGPU::TMA_HI)
1953     .Case("tba_lo", AMDGPU::TBA_LO)
1954     .Case("tba_hi", AMDGPU::TBA_HI)
1955     .Case("null", AMDGPU::SGPR_NULL)
1956     .Default(0);
1957 }
1958 
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1960                                     SMLoc &EndLoc) {
1961   auto R = parseRegister();
1962   if (!R) return true;
1963   assert(R->isReg());
1964   RegNo = R->getReg();
1965   StartLoc = R->getStartLoc();
1966   EndLoc = R->getEndLoc();
1967   return false;
1968 }
1969 
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1971                                             RegisterKind RegKind, unsigned Reg1,
1972                                             unsigned RegNum) {
1973   switch (RegKind) {
1974   case IS_SPECIAL:
1975     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1976       Reg = AMDGPU::EXEC;
1977       RegWidth = 2;
1978       return true;
1979     }
1980     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1981       Reg = AMDGPU::FLAT_SCR;
1982       RegWidth = 2;
1983       return true;
1984     }
1985     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1986       Reg = AMDGPU::XNACK_MASK;
1987       RegWidth = 2;
1988       return true;
1989     }
1990     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1991       Reg = AMDGPU::VCC;
1992       RegWidth = 2;
1993       return true;
1994     }
1995     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1996       Reg = AMDGPU::TBA;
1997       RegWidth = 2;
1998       return true;
1999     }
2000     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2001       Reg = AMDGPU::TMA;
2002       RegWidth = 2;
2003       return true;
2004     }
2005     return false;
2006   case IS_VGPR:
2007   case IS_SGPR:
2008   case IS_AGPR:
2009   case IS_TTMP:
2010     if (Reg1 != Reg + RegWidth) {
2011       return false;
2012     }
2013     RegWidth++;
2014     return true;
2015   default:
2016     llvm_unreachable("unexpected register kind");
2017   }
2018 }
2019 
2020 static constexpr StringLiteral Registers[] = {"v", "s", "ttmp", "acc", "a"};
2021 
2022 bool
2023 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2024                             const AsmToken &NextToken) const {
2025 
2026   // A list of consecutive registers: [s0,s1,s2,s3]
2027   if (Token.is(AsmToken::LBrac))
2028     return true;
2029 
2030   if (!Token.is(AsmToken::Identifier))
2031     return false;
2032 
2033   // A single register like s0 or a range of registers like s[0:1]
2034 
2035   StringRef RegName = Token.getString();
2036 
2037   for (StringRef Reg : Registers) {
2038     if (RegName.startswith(Reg)) {
2039       if (Reg.size() < RegName.size()) {
2040         unsigned RegNum;
2041         // A single register with an index: rXX
2042         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2043           return true;
2044       } else {
2045         // A range of registers: r[XX:YY].
2046         if (NextToken.is(AsmToken::LBrac))
2047           return true;
2048       }
2049     }
2050   }
2051 
2052   return getSpecialRegForName(RegName);
2053 }
2054 
2055 bool
2056 AMDGPUAsmParser::isRegister()
2057 {
2058   return isRegister(getToken(), peekToken());
2059 }
2060 
2061 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2062                                           unsigned &RegNum, unsigned &RegWidth,
2063                                           unsigned *DwordRegIndex) {
2064   if (DwordRegIndex) { *DwordRegIndex = 0; }
2065   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2066   if (getLexer().is(AsmToken::Identifier)) {
2067     StringRef RegName = Parser.getTok().getString();
2068     if ((Reg = getSpecialRegForName(RegName))) {
2069       Parser.Lex();
2070       RegKind = IS_SPECIAL;
2071     } else {
2072       unsigned RegNumIndex = 0;
2073       if (RegName[0] == 'v') {
2074         RegNumIndex = 1;
2075         RegKind = IS_VGPR;
2076       } else if (RegName[0] == 's') {
2077         RegNumIndex = 1;
2078         RegKind = IS_SGPR;
2079       } else if (RegName[0] == 'a') {
2080         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2081         RegKind = IS_AGPR;
2082       } else if (RegName.startswith("ttmp")) {
2083         RegNumIndex = strlen("ttmp");
2084         RegKind = IS_TTMP;
2085       } else {
2086         return false;
2087       }
2088       if (RegName.size() > RegNumIndex) {
2089         // Single 32-bit register: vXX.
2090         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2091           return false;
2092         Parser.Lex();
2093         RegWidth = 1;
2094       } else {
2095         // Range of registers: v[XX:YY]. ":YY" is optional.
2096         Parser.Lex();
2097         int64_t RegLo, RegHi;
2098         if (getLexer().isNot(AsmToken::LBrac))
2099           return false;
2100         Parser.Lex();
2101 
2102         if (getParser().parseAbsoluteExpression(RegLo))
2103           return false;
2104 
2105         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2106         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2107           return false;
2108         Parser.Lex();
2109 
2110         if (isRBrace) {
2111           RegHi = RegLo;
2112         } else {
2113           if (getParser().parseAbsoluteExpression(RegHi))
2114             return false;
2115 
2116           if (getLexer().isNot(AsmToken::RBrac))
2117             return false;
2118           Parser.Lex();
2119         }
2120         RegNum = (unsigned) RegLo;
2121         RegWidth = (RegHi - RegLo) + 1;
2122       }
2123     }
2124   } else if (getLexer().is(AsmToken::LBrac)) {
2125     // List of consecutive registers: [s0,s1,s2,s3]
2126     Parser.Lex();
2127     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2128       return false;
2129     if (RegWidth != 1)
2130       return false;
2131     RegisterKind RegKind1;
2132     unsigned Reg1, RegNum1, RegWidth1;
2133     do {
2134       if (getLexer().is(AsmToken::Comma)) {
2135         Parser.Lex();
2136       } else if (getLexer().is(AsmToken::RBrac)) {
2137         Parser.Lex();
2138         break;
2139       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2140         if (RegWidth1 != 1) {
2141           return false;
2142         }
2143         if (RegKind1 != RegKind) {
2144           return false;
2145         }
2146         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2147           return false;
2148         }
2149       } else {
2150         return false;
2151       }
2152     } while (true);
2153   } else {
2154     return false;
2155   }
2156   switch (RegKind) {
2157   case IS_SPECIAL:
2158     RegNum = 0;
2159     RegWidth = 1;
2160     break;
2161   case IS_VGPR:
2162   case IS_SGPR:
2163   case IS_AGPR:
2164   case IS_TTMP:
2165   {
2166     unsigned Size = 1;
2167     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2168       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2169       Size = std::min(RegWidth, 4u);
2170     }
2171     if (RegNum % Size != 0)
2172       return false;
2173     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2174     RegNum = RegNum / Size;
2175     int RCID = getRegClass(RegKind, RegWidth);
2176     if (RCID == -1)
2177       return false;
2178     const MCRegisterClass RC = TRI->getRegClass(RCID);
2179     if (RegNum >= RC.getNumRegs())
2180       return false;
2181     Reg = RC.getRegister(RegNum);
2182     break;
2183   }
2184 
2185   default:
2186     llvm_unreachable("unexpected register kind");
2187   }
2188 
2189   if (!subtargetHasRegister(*TRI, Reg))
2190     return false;
2191   return true;
2192 }
2193 
2194 Optional<StringRef>
2195 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2196   switch (RegKind) {
2197   case IS_VGPR:
2198     return StringRef(".amdgcn.next_free_vgpr");
2199   case IS_SGPR:
2200     return StringRef(".amdgcn.next_free_sgpr");
2201   default:
2202     return None;
2203   }
2204 }
2205 
2206 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2207   auto SymbolName = getGprCountSymbolName(RegKind);
2208   assert(SymbolName && "initializing invalid register kind");
2209   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2210   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2211 }
2212 
2213 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2214                                             unsigned DwordRegIndex,
2215                                             unsigned RegWidth) {
2216   // Symbols are only defined for GCN targets
2217   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2218     return true;
2219 
2220   auto SymbolName = getGprCountSymbolName(RegKind);
2221   if (!SymbolName)
2222     return true;
2223   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2224 
2225   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2226   int64_t OldCount;
2227 
2228   if (!Sym->isVariable())
2229     return !Error(getParser().getTok().getLoc(),
2230                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2231   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2232     return !Error(
2233         getParser().getTok().getLoc(),
2234         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2235 
2236   if (OldCount <= NewMax)
2237     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2238 
2239   return true;
2240 }
2241 
2242 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2243   const auto &Tok = Parser.getTok();
2244   SMLoc StartLoc = Tok.getLoc();
2245   SMLoc EndLoc = Tok.getEndLoc();
2246   RegisterKind RegKind;
2247   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2248 
2249   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2250     //FIXME: improve error messages (bug 41303).
2251     Error(StartLoc, "not a valid operand.");
2252     return nullptr;
2253   }
2254   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2255     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2256       return nullptr;
2257   } else
2258     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2259   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2260 }
2261 
2262 OperandMatchResultTy
2263 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2264   // TODO: add syntactic sugar for 1/(2*PI)
2265 
2266   assert(!isRegister());
2267   assert(!isModifier());
2268 
2269   const auto& Tok = getToken();
2270   const auto& NextTok = peekToken();
2271   bool IsReal = Tok.is(AsmToken::Real);
2272   SMLoc S = getLoc();
2273   bool Negate = false;
2274 
2275   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2276     lex();
2277     IsReal = true;
2278     Negate = true;
2279   }
2280 
2281   if (IsReal) {
2282     // Floating-point expressions are not supported.
2283     // Can only allow floating-point literals with an
2284     // optional sign.
2285 
2286     StringRef Num = getTokenStr();
2287     lex();
2288 
2289     APFloat RealVal(APFloat::IEEEdouble());
2290     auto roundMode = APFloat::rmNearestTiesToEven;
2291     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2292       return MatchOperand_ParseFail;
2293     }
2294     if (Negate)
2295       RealVal.changeSign();
2296 
2297     Operands.push_back(
2298       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2299                                AMDGPUOperand::ImmTyNone, true));
2300 
2301     return MatchOperand_Success;
2302 
2303   } else {
2304     int64_t IntVal;
2305     const MCExpr *Expr;
2306     SMLoc S = getLoc();
2307 
2308     if (HasSP3AbsModifier) {
2309       // This is a workaround for handling expressions
2310       // as arguments of SP3 'abs' modifier, for example:
2311       //     |1.0|
2312       //     |-1|
2313       //     |1+x|
2314       // This syntax is not compatible with syntax of standard
2315       // MC expressions (due to the trailing '|').
2316       SMLoc EndLoc;
2317       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2318         return MatchOperand_ParseFail;
2319     } else {
2320       if (Parser.parseExpression(Expr))
2321         return MatchOperand_ParseFail;
2322     }
2323 
2324     if (Expr->evaluateAsAbsolute(IntVal)) {
2325       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2326     } else {
2327       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2328     }
2329 
2330     return MatchOperand_Success;
2331   }
2332 
2333   return MatchOperand_NoMatch;
2334 }
2335 
2336 OperandMatchResultTy
2337 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2338   if (!isRegister())
2339     return MatchOperand_NoMatch;
2340 
2341   if (auto R = parseRegister()) {
2342     assert(R->isReg());
2343     Operands.push_back(std::move(R));
2344     return MatchOperand_Success;
2345   }
2346   return MatchOperand_ParseFail;
2347 }
2348 
2349 OperandMatchResultTy
2350 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2351   auto res = parseReg(Operands);
2352   if (res != MatchOperand_NoMatch) {
2353     return res;
2354   } else if (isModifier()) {
2355     return MatchOperand_NoMatch;
2356   } else {
2357     return parseImm(Operands, HasSP3AbsMod);
2358   }
2359 }
2360 
2361 bool
2362 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2363   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2364     const auto &str = Token.getString();
2365     return str == "abs" || str == "neg" || str == "sext";
2366   }
2367   return false;
2368 }
2369 
2370 bool
2371 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2372   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2373 }
2374 
2375 bool
2376 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2377   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2378 }
2379 
2380 bool
2381 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2382   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2383 }
2384 
2385 // Check if this is an operand modifier or an opcode modifier
2386 // which may look like an expression but it is not. We should
2387 // avoid parsing these modifiers as expressions. Currently
2388 // recognized sequences are:
2389 //   |...|
2390 //   abs(...)
2391 //   neg(...)
2392 //   sext(...)
2393 //   -reg
2394 //   -|...|
2395 //   -abs(...)
2396 //   name:...
2397 // Note that simple opcode modifiers like 'gds' may be parsed as
2398 // expressions; this is a special case. See getExpressionAsToken.
2399 //
2400 bool
2401 AMDGPUAsmParser::isModifier() {
2402 
2403   AsmToken Tok = getToken();
2404   AsmToken NextToken[2];
2405   peekTokens(NextToken);
2406 
2407   return isOperandModifier(Tok, NextToken[0]) ||
2408          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2409          isOpcodeModifierWithVal(Tok, NextToken[0]);
2410 }
2411 
2412 // Check if the current token is an SP3 'neg' modifier.
2413 // Currently this modifier is allowed in the following context:
2414 //
2415 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2416 // 2. Before an 'abs' modifier: -abs(...)
2417 // 3. Before an SP3 'abs' modifier: -|...|
2418 //
2419 // In all other cases "-" is handled as a part
2420 // of an expression that follows the sign.
2421 //
2422 // Note: When "-" is followed by an integer literal,
2423 // this is interpreted as integer negation rather
2424 // than a floating-point NEG modifier applied to N.
2425 // Beside being contr-intuitive, such use of floating-point
2426 // NEG modifier would have resulted in different meaning
2427 // of integer literals used with VOP1/2/C and VOP3,
2428 // for example:
2429 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2430 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2431 // Negative fp literals with preceding "-" are
2432 // handled likewise for unifomtity
2433 //
2434 bool
2435 AMDGPUAsmParser::parseSP3NegModifier() {
2436 
2437   AsmToken NextToken[2];
2438   peekTokens(NextToken);
2439 
2440   if (isToken(AsmToken::Minus) &&
2441       (isRegister(NextToken[0], NextToken[1]) ||
2442        NextToken[0].is(AsmToken::Pipe) ||
2443        isId(NextToken[0], "abs"))) {
2444     lex();
2445     return true;
2446   }
2447 
2448   return false;
2449 }
2450 
2451 OperandMatchResultTy
2452 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2453                                               bool AllowImm) {
2454   bool Neg, SP3Neg;
2455   bool Abs, SP3Abs;
2456   SMLoc Loc;
2457 
2458   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2459   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2460     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2461     return MatchOperand_ParseFail;
2462   }
2463 
2464   SP3Neg = parseSP3NegModifier();
2465 
2466   Loc = getLoc();
2467   Neg = trySkipId("neg");
2468   if (Neg && SP3Neg) {
2469     Error(Loc, "expected register or immediate");
2470     return MatchOperand_ParseFail;
2471   }
2472   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2473     return MatchOperand_ParseFail;
2474 
2475   Abs = trySkipId("abs");
2476   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2477     return MatchOperand_ParseFail;
2478 
2479   Loc = getLoc();
2480   SP3Abs = trySkipToken(AsmToken::Pipe);
2481   if (Abs && SP3Abs) {
2482     Error(Loc, "expected register or immediate");
2483     return MatchOperand_ParseFail;
2484   }
2485 
2486   OperandMatchResultTy Res;
2487   if (AllowImm) {
2488     Res = parseRegOrImm(Operands, SP3Abs);
2489   } else {
2490     Res = parseReg(Operands);
2491   }
2492   if (Res != MatchOperand_Success) {
2493     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2494   }
2495 
2496   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2497     return MatchOperand_ParseFail;
2498   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2499     return MatchOperand_ParseFail;
2500   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2501     return MatchOperand_ParseFail;
2502 
2503   AMDGPUOperand::Modifiers Mods;
2504   Mods.Abs = Abs || SP3Abs;
2505   Mods.Neg = Neg || SP3Neg;
2506 
2507   if (Mods.hasFPModifiers()) {
2508     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2509     if (Op.isExpr()) {
2510       Error(Op.getStartLoc(), "expected an absolute expression");
2511       return MatchOperand_ParseFail;
2512     }
2513     Op.setModifiers(Mods);
2514   }
2515   return MatchOperand_Success;
2516 }
2517 
2518 OperandMatchResultTy
2519 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2520                                                bool AllowImm) {
2521   bool Sext = trySkipId("sext");
2522   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2523     return MatchOperand_ParseFail;
2524 
2525   OperandMatchResultTy Res;
2526   if (AllowImm) {
2527     Res = parseRegOrImm(Operands);
2528   } else {
2529     Res = parseReg(Operands);
2530   }
2531   if (Res != MatchOperand_Success) {
2532     return Sext? MatchOperand_ParseFail : Res;
2533   }
2534 
2535   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2536     return MatchOperand_ParseFail;
2537 
2538   AMDGPUOperand::Modifiers Mods;
2539   Mods.Sext = Sext;
2540 
2541   if (Mods.hasIntModifiers()) {
2542     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2543     if (Op.isExpr()) {
2544       Error(Op.getStartLoc(), "expected an absolute expression");
2545       return MatchOperand_ParseFail;
2546     }
2547     Op.setModifiers(Mods);
2548   }
2549 
2550   return MatchOperand_Success;
2551 }
2552 
2553 OperandMatchResultTy
2554 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2555   return parseRegOrImmWithFPInputMods(Operands, false);
2556 }
2557 
2558 OperandMatchResultTy
2559 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2560   return parseRegOrImmWithIntInputMods(Operands, false);
2561 }
2562 
2563 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2564   auto Loc = getLoc();
2565   if (trySkipId("off")) {
2566     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2567                                                 AMDGPUOperand::ImmTyOff, false));
2568     return MatchOperand_Success;
2569   }
2570 
2571   if (!isRegister())
2572     return MatchOperand_NoMatch;
2573 
2574   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2575   if (Reg) {
2576     Operands.push_back(std::move(Reg));
2577     return MatchOperand_Success;
2578   }
2579 
2580   return MatchOperand_ParseFail;
2581 
2582 }
2583 
2584 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2585   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2586 
2587   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2588       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2589       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2590       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2591     return Match_InvalidOperand;
2592 
2593   if ((TSFlags & SIInstrFlags::VOP3) &&
2594       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2595       getForcedEncodingSize() != 64)
2596     return Match_PreferE32;
2597 
2598   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2599       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2600     // v_mac_f32/16 allow only dst_sel == DWORD;
2601     auto OpNum =
2602         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2603     const auto &Op = Inst.getOperand(OpNum);
2604     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2605       return Match_InvalidOperand;
2606     }
2607   }
2608 
2609   return Match_Success;
2610 }
2611 
2612 // What asm variants we should check
2613 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2614   if (getForcedEncodingSize() == 32) {
2615     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2616     return makeArrayRef(Variants);
2617   }
2618 
2619   if (isForcedVOP3()) {
2620     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2621     return makeArrayRef(Variants);
2622   }
2623 
2624   if (isForcedSDWA()) {
2625     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2626                                         AMDGPUAsmVariants::SDWA9};
2627     return makeArrayRef(Variants);
2628   }
2629 
2630   if (isForcedDPP()) {
2631     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2632     return makeArrayRef(Variants);
2633   }
2634 
2635   static const unsigned Variants[] = {
2636     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2637     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2638   };
2639 
2640   return makeArrayRef(Variants);
2641 }
2642 
2643 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2644   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2645   const unsigned Num = Desc.getNumImplicitUses();
2646   for (unsigned i = 0; i < Num; ++i) {
2647     unsigned Reg = Desc.ImplicitUses[i];
2648     switch (Reg) {
2649     case AMDGPU::FLAT_SCR:
2650     case AMDGPU::VCC:
2651     case AMDGPU::VCC_LO:
2652     case AMDGPU::VCC_HI:
2653     case AMDGPU::M0:
2654       return Reg;
2655     default:
2656       break;
2657     }
2658   }
2659   return AMDGPU::NoRegister;
2660 }
2661 
2662 // NB: This code is correct only when used to check constant
2663 // bus limitations because GFX7 support no f16 inline constants.
2664 // Note that there are no cases when a GFX7 opcode violates
2665 // constant bus limitations due to the use of an f16 constant.
2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2667                                        unsigned OpIdx) const {
2668   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2669 
2670   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2671     return false;
2672   }
2673 
2674   const MCOperand &MO = Inst.getOperand(OpIdx);
2675 
2676   int64_t Val = MO.getImm();
2677   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2678 
2679   switch (OpSize) { // expected operand size
2680   case 8:
2681     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2682   case 4:
2683     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2684   case 2: {
2685     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2686     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2687         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2688         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2689         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2690         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2691         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2692       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2693     } else {
2694       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2695     }
2696   }
2697   default:
2698     llvm_unreachable("invalid operand size");
2699   }
2700 }
2701 
2702 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2703   if (!isGFX10())
2704     return 1;
2705 
2706   switch (Opcode) {
2707   // 64-bit shift instructions can use only one scalar value input
2708   case AMDGPU::V_LSHLREV_B64:
2709   case AMDGPU::V_LSHLREV_B64_gfx10:
2710   case AMDGPU::V_LSHL_B64:
2711   case AMDGPU::V_LSHRREV_B64:
2712   case AMDGPU::V_LSHRREV_B64_gfx10:
2713   case AMDGPU::V_LSHR_B64:
2714   case AMDGPU::V_ASHRREV_I64:
2715   case AMDGPU::V_ASHRREV_I64_gfx10:
2716   case AMDGPU::V_ASHR_I64:
2717     return 1;
2718   default:
2719     return 2;
2720   }
2721 }
2722 
2723 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2724   const MCOperand &MO = Inst.getOperand(OpIdx);
2725   if (MO.isImm()) {
2726     return !isInlineConstant(Inst, OpIdx);
2727   } else if (MO.isReg()) {
2728     auto Reg = MO.getReg();
2729     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2730     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2731   } else {
2732     return true;
2733   }
2734 }
2735 
2736 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2737   const unsigned Opcode = Inst.getOpcode();
2738   const MCInstrDesc &Desc = MII.get(Opcode);
2739   unsigned ConstantBusUseCount = 0;
2740   unsigned NumLiterals = 0;
2741   unsigned LiteralSize;
2742 
2743   if (Desc.TSFlags &
2744       (SIInstrFlags::VOPC |
2745        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2746        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2747        SIInstrFlags::SDWA)) {
2748     // Check special imm operands (used by madmk, etc)
2749     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2750       ++ConstantBusUseCount;
2751     }
2752 
2753     SmallDenseSet<unsigned> SGPRsUsed;
2754     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2755     if (SGPRUsed != AMDGPU::NoRegister) {
2756       SGPRsUsed.insert(SGPRUsed);
2757       ++ConstantBusUseCount;
2758     }
2759 
2760     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2761     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2762     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2763 
2764     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2765 
2766     for (int OpIdx : OpIndices) {
2767       if (OpIdx == -1) break;
2768 
2769       const MCOperand &MO = Inst.getOperand(OpIdx);
2770       if (usesConstantBus(Inst, OpIdx)) {
2771         if (MO.isReg()) {
2772           const unsigned Reg = mc2PseudoReg(MO.getReg());
2773           // Pairs of registers with a partial intersections like these
2774           //   s0, s[0:1]
2775           //   flat_scratch_lo, flat_scratch
2776           //   flat_scratch_lo, flat_scratch_hi
2777           // are theoretically valid but they are disabled anyway.
2778           // Note that this code mimics SIInstrInfo::verifyInstruction
2779           if (!SGPRsUsed.count(Reg)) {
2780             SGPRsUsed.insert(Reg);
2781             ++ConstantBusUseCount;
2782           }
2783         } else { // Expression or a literal
2784 
2785           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2786             continue; // special operand like VINTERP attr_chan
2787 
2788           // An instruction may use only one literal.
2789           // This has been validated on the previous step.
2790           // See validateVOP3Literal.
2791           // This literal may be used as more than one operand.
2792           // If all these operands are of the same size,
2793           // this literal counts as one scalar value.
2794           // Otherwise it counts as 2 scalar values.
2795           // See "GFX10 Shader Programming", section 3.6.2.3.
2796 
2797           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2798           if (Size < 4) Size = 4;
2799 
2800           if (NumLiterals == 0) {
2801             NumLiterals = 1;
2802             LiteralSize = Size;
2803           } else if (LiteralSize != Size) {
2804             NumLiterals = 2;
2805           }
2806         }
2807       }
2808     }
2809   }
2810   ConstantBusUseCount += NumLiterals;
2811 
2812   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2813 }
2814 
2815 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2816   const unsigned Opcode = Inst.getOpcode();
2817   const MCInstrDesc &Desc = MII.get(Opcode);
2818 
2819   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2820   if (DstIdx == -1 ||
2821       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2822     return true;
2823   }
2824 
2825   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2826 
2827   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2828   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2829   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2830 
2831   assert(DstIdx != -1);
2832   const MCOperand &Dst = Inst.getOperand(DstIdx);
2833   assert(Dst.isReg());
2834   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2835 
2836   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2837 
2838   for (int SrcIdx : SrcIndices) {
2839     if (SrcIdx == -1) break;
2840     const MCOperand &Src = Inst.getOperand(SrcIdx);
2841     if (Src.isReg()) {
2842       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2843       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2844         return false;
2845       }
2846     }
2847   }
2848 
2849   return true;
2850 }
2851 
2852 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2853 
2854   const unsigned Opc = Inst.getOpcode();
2855   const MCInstrDesc &Desc = MII.get(Opc);
2856 
2857   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2858     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2859     assert(ClampIdx != -1);
2860     return Inst.getOperand(ClampIdx).getImm() == 0;
2861   }
2862 
2863   return true;
2864 }
2865 
2866 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2867 
2868   const unsigned Opc = Inst.getOpcode();
2869   const MCInstrDesc &Desc = MII.get(Opc);
2870 
2871   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2872     return true;
2873 
2874   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2875   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2876   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2877 
2878   assert(VDataIdx != -1);
2879   assert(DMaskIdx != -1);
2880   assert(TFEIdx != -1);
2881 
2882   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2883   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2884   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2885   if (DMask == 0)
2886     DMask = 1;
2887 
2888   unsigned DataSize =
2889     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2890   if (hasPackedD16()) {
2891     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2892     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2893       DataSize = (DataSize + 1) / 2;
2894   }
2895 
2896   return (VDataSize / 4) == DataSize + TFESize;
2897 }
2898 
2899 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2900   const unsigned Opc = Inst.getOpcode();
2901   const MCInstrDesc &Desc = MII.get(Opc);
2902 
2903   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2904     return true;
2905 
2906   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2907   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2908       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2909   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2910   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2911   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2912 
2913   assert(VAddr0Idx != -1);
2914   assert(SrsrcIdx != -1);
2915   assert(DimIdx != -1);
2916   assert(SrsrcIdx > VAddr0Idx);
2917 
2918   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2919   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2920   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2921   unsigned VAddrSize =
2922       IsNSA ? SrsrcIdx - VAddr0Idx
2923             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2924 
2925   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2926                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2927                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2928                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2929   if (!IsNSA) {
2930     if (AddrSize > 8)
2931       AddrSize = 16;
2932     else if (AddrSize > 4)
2933       AddrSize = 8;
2934   }
2935 
2936   return VAddrSize == AddrSize;
2937 }
2938 
2939 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2940 
2941   const unsigned Opc = Inst.getOpcode();
2942   const MCInstrDesc &Desc = MII.get(Opc);
2943 
2944   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2945     return true;
2946   if (!Desc.mayLoad() || !Desc.mayStore())
2947     return true; // Not atomic
2948 
2949   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2951 
2952   // This is an incomplete check because image_atomic_cmpswap
2953   // may only use 0x3 and 0xf while other atomic operations
2954   // may use 0x1 and 0x3. However these limitations are
2955   // verified when we check that dmask matches dst size.
2956   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2957 }
2958 
2959 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2960 
2961   const unsigned Opc = Inst.getOpcode();
2962   const MCInstrDesc &Desc = MII.get(Opc);
2963 
2964   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2965     return true;
2966 
2967   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2968   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2969 
2970   // GATHER4 instructions use dmask in a different fashion compared to
2971   // other MIMG instructions. The only useful DMASK values are
2972   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2973   // (red,red,red,red) etc.) The ISA document doesn't mention
2974   // this.
2975   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2976 }
2977 
2978 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2979 
2980   const unsigned Opc = Inst.getOpcode();
2981   const MCInstrDesc &Desc = MII.get(Opc);
2982 
2983   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2984     return true;
2985 
2986   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2987   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2988     if (isCI() || isSI())
2989       return false;
2990   }
2991 
2992   return true;
2993 }
2994 
2995 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2996   const unsigned Opc = Inst.getOpcode();
2997   const MCInstrDesc &Desc = MII.get(Opc);
2998 
2999   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3000     return true;
3001 
3002   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3003   if (DimIdx < 0)
3004     return true;
3005 
3006   long Imm = Inst.getOperand(DimIdx).getImm();
3007   if (Imm < 0 || Imm >= 8)
3008     return false;
3009 
3010   return true;
3011 }
3012 
3013 static bool IsRevOpcode(const unsigned Opcode)
3014 {
3015   switch (Opcode) {
3016   case AMDGPU::V_SUBREV_F32_e32:
3017   case AMDGPU::V_SUBREV_F32_e64:
3018   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3019   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3020   case AMDGPU::V_SUBREV_F32_e32_vi:
3021   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3022   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3023   case AMDGPU::V_SUBREV_F32_e64_vi:
3024 
3025   case AMDGPU::V_SUBREV_I32_e32:
3026   case AMDGPU::V_SUBREV_I32_e64:
3027   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3028   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3029 
3030   case AMDGPU::V_SUBBREV_U32_e32:
3031   case AMDGPU::V_SUBBREV_U32_e64:
3032   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3033   case AMDGPU::V_SUBBREV_U32_e32_vi:
3034   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3035   case AMDGPU::V_SUBBREV_U32_e64_vi:
3036 
3037   case AMDGPU::V_SUBREV_U32_e32:
3038   case AMDGPU::V_SUBREV_U32_e64:
3039   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3040   case AMDGPU::V_SUBREV_U32_e32_vi:
3041   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3042   case AMDGPU::V_SUBREV_U32_e64_vi:
3043 
3044   case AMDGPU::V_SUBREV_F16_e32:
3045   case AMDGPU::V_SUBREV_F16_e64:
3046   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3047   case AMDGPU::V_SUBREV_F16_e32_vi:
3048   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3049   case AMDGPU::V_SUBREV_F16_e64_vi:
3050 
3051   case AMDGPU::V_SUBREV_U16_e32:
3052   case AMDGPU::V_SUBREV_U16_e64:
3053   case AMDGPU::V_SUBREV_U16_e32_vi:
3054   case AMDGPU::V_SUBREV_U16_e64_vi:
3055 
3056   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3057   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3058   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3059 
3060   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3061   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3062 
3063   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3064   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3065 
3066   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3067   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3068 
3069   case AMDGPU::V_LSHRREV_B32_e32:
3070   case AMDGPU::V_LSHRREV_B32_e64:
3071   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3072   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3073   case AMDGPU::V_LSHRREV_B32_e32_vi:
3074   case AMDGPU::V_LSHRREV_B32_e64_vi:
3075   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3076   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3077 
3078   case AMDGPU::V_ASHRREV_I32_e32:
3079   case AMDGPU::V_ASHRREV_I32_e64:
3080   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3081   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3082   case AMDGPU::V_ASHRREV_I32_e32_vi:
3083   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3084   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3085   case AMDGPU::V_ASHRREV_I32_e64_vi:
3086 
3087   case AMDGPU::V_LSHLREV_B32_e32:
3088   case AMDGPU::V_LSHLREV_B32_e64:
3089   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3090   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3091   case AMDGPU::V_LSHLREV_B32_e32_vi:
3092   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3093   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3094   case AMDGPU::V_LSHLREV_B32_e64_vi:
3095 
3096   case AMDGPU::V_LSHLREV_B16_e32:
3097   case AMDGPU::V_LSHLREV_B16_e64:
3098   case AMDGPU::V_LSHLREV_B16_e32_vi:
3099   case AMDGPU::V_LSHLREV_B16_e64_vi:
3100   case AMDGPU::V_LSHLREV_B16_gfx10:
3101 
3102   case AMDGPU::V_LSHRREV_B16_e32:
3103   case AMDGPU::V_LSHRREV_B16_e64:
3104   case AMDGPU::V_LSHRREV_B16_e32_vi:
3105   case AMDGPU::V_LSHRREV_B16_e64_vi:
3106   case AMDGPU::V_LSHRREV_B16_gfx10:
3107 
3108   case AMDGPU::V_ASHRREV_I16_e32:
3109   case AMDGPU::V_ASHRREV_I16_e64:
3110   case AMDGPU::V_ASHRREV_I16_e32_vi:
3111   case AMDGPU::V_ASHRREV_I16_e64_vi:
3112   case AMDGPU::V_ASHRREV_I16_gfx10:
3113 
3114   case AMDGPU::V_LSHLREV_B64:
3115   case AMDGPU::V_LSHLREV_B64_gfx10:
3116   case AMDGPU::V_LSHLREV_B64_vi:
3117 
3118   case AMDGPU::V_LSHRREV_B64:
3119   case AMDGPU::V_LSHRREV_B64_gfx10:
3120   case AMDGPU::V_LSHRREV_B64_vi:
3121 
3122   case AMDGPU::V_ASHRREV_I64:
3123   case AMDGPU::V_ASHRREV_I64_gfx10:
3124   case AMDGPU::V_ASHRREV_I64_vi:
3125 
3126   case AMDGPU::V_PK_LSHLREV_B16:
3127   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3128   case AMDGPU::V_PK_LSHLREV_B16_vi:
3129 
3130   case AMDGPU::V_PK_LSHRREV_B16:
3131   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3132   case AMDGPU::V_PK_LSHRREV_B16_vi:
3133   case AMDGPU::V_PK_ASHRREV_I16:
3134   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3135   case AMDGPU::V_PK_ASHRREV_I16_vi:
3136     return true;
3137   default:
3138     return false;
3139   }
3140 }
3141 
3142 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3143 
3144   using namespace SIInstrFlags;
3145   const unsigned Opcode = Inst.getOpcode();
3146   const MCInstrDesc &Desc = MII.get(Opcode);
3147 
3148   // lds_direct register is defined so that it can be used
3149   // with 9-bit operands only. Ignore encodings which do not accept these.
3150   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3151     return true;
3152 
3153   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3154   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3155   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3156 
3157   const int SrcIndices[] = { Src1Idx, Src2Idx };
3158 
3159   // lds_direct cannot be specified as either src1 or src2.
3160   for (int SrcIdx : SrcIndices) {
3161     if (SrcIdx == -1) break;
3162     const MCOperand &Src = Inst.getOperand(SrcIdx);
3163     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3164       return false;
3165     }
3166   }
3167 
3168   if (Src0Idx == -1)
3169     return true;
3170 
3171   const MCOperand &Src = Inst.getOperand(Src0Idx);
3172   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3173     return true;
3174 
3175   // lds_direct is specified as src0. Check additional limitations.
3176   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3177 }
3178 
3179 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3180   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3181     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3182     if (Op.isFlatOffset())
3183       return Op.getStartLoc();
3184   }
3185   return getLoc();
3186 }
3187 
3188 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3189                                          const OperandVector &Operands) {
3190   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3191   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3192     return true;
3193 
3194   auto Opcode = Inst.getOpcode();
3195   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3196   assert(OpNum != -1);
3197 
3198   const auto &Op = Inst.getOperand(OpNum);
3199   if (!hasFlatOffsets() && Op.getImm() != 0) {
3200     Error(getFlatOffsetLoc(Operands),
3201           "flat offset modifier is not supported on this GPU");
3202     return false;
3203   }
3204 
3205   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3206   // For FLAT segment the offset must be positive;
3207   // MSB is ignored and forced to zero.
3208   unsigned OffsetSize = isGFX9() ? 13 : 12;
3209   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3210     if (!isIntN(OffsetSize, Op.getImm())) {
3211       Error(getFlatOffsetLoc(Operands),
3212             isGFX9() ? "expected a 13-bit signed offset" :
3213                        "expected a 12-bit signed offset");
3214       return false;
3215     }
3216   } else {
3217     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3218       Error(getFlatOffsetLoc(Operands),
3219             isGFX9() ? "expected a 12-bit unsigned offset" :
3220                        "expected an 11-bit unsigned offset");
3221       return false;
3222     }
3223   }
3224 
3225   return true;
3226 }
3227 
3228 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3229   unsigned Opcode = Inst.getOpcode();
3230   const MCInstrDesc &Desc = MII.get(Opcode);
3231   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3232     return true;
3233 
3234   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3235   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3236 
3237   const int OpIndices[] = { Src0Idx, Src1Idx };
3238 
3239   unsigned NumExprs = 0;
3240   unsigned NumLiterals = 0;
3241   uint32_t LiteralValue;
3242 
3243   for (int OpIdx : OpIndices) {
3244     if (OpIdx == -1) break;
3245 
3246     const MCOperand &MO = Inst.getOperand(OpIdx);
3247     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3248     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3249       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3250         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3251         if (NumLiterals == 0 || LiteralValue != Value) {
3252           LiteralValue = Value;
3253           ++NumLiterals;
3254         }
3255       } else if (MO.isExpr()) {
3256         ++NumExprs;
3257       }
3258     }
3259   }
3260 
3261   return NumLiterals + NumExprs <= 1;
3262 }
3263 
3264 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3265   const unsigned Opc = Inst.getOpcode();
3266   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3267       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3268     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3269     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3270 
3271     if (OpSel & ~3)
3272       return false;
3273   }
3274   return true;
3275 }
3276 
3277 // Check if VCC register matches wavefront size
3278 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3279   auto FB = getFeatureBits();
3280   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3281     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3282 }
3283 
3284 // VOP3 literal is only allowed in GFX10+ and only one can be used
3285 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3286   unsigned Opcode = Inst.getOpcode();
3287   const MCInstrDesc &Desc = MII.get(Opcode);
3288   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3289     return true;
3290 
3291   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3292   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3293   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3294 
3295   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3296 
3297   unsigned NumExprs = 0;
3298   unsigned NumLiterals = 0;
3299   uint32_t LiteralValue;
3300 
3301   for (int OpIdx : OpIndices) {
3302     if (OpIdx == -1) break;
3303 
3304     const MCOperand &MO = Inst.getOperand(OpIdx);
3305     if (!MO.isImm() && !MO.isExpr())
3306       continue;
3307     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3308       continue;
3309 
3310     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3311         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3312       return false;
3313 
3314     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3315       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3316       if (NumLiterals == 0 || LiteralValue != Value) {
3317         LiteralValue = Value;
3318         ++NumLiterals;
3319       }
3320     } else if (MO.isExpr()) {
3321       ++NumExprs;
3322     }
3323   }
3324   NumLiterals += NumExprs;
3325 
3326   return !NumLiterals ||
3327          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3328 }
3329 
3330 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3331                                           const SMLoc &IDLoc,
3332                                           const OperandVector &Operands) {
3333   if (!validateLdsDirect(Inst)) {
3334     Error(IDLoc,
3335       "invalid use of lds_direct");
3336     return false;
3337   }
3338   if (!validateSOPLiteral(Inst)) {
3339     Error(IDLoc,
3340       "only one literal operand is allowed");
3341     return false;
3342   }
3343   if (!validateVOP3Literal(Inst)) {
3344     Error(IDLoc,
3345       "invalid literal operand");
3346     return false;
3347   }
3348   if (!validateConstantBusLimitations(Inst)) {
3349     Error(IDLoc,
3350       "invalid operand (violates constant bus restrictions)");
3351     return false;
3352   }
3353   if (!validateEarlyClobberLimitations(Inst)) {
3354     Error(IDLoc,
3355       "destination must be different than all sources");
3356     return false;
3357   }
3358   if (!validateIntClampSupported(Inst)) {
3359     Error(IDLoc,
3360       "integer clamping is not supported on this GPU");
3361     return false;
3362   }
3363   if (!validateOpSel(Inst)) {
3364     Error(IDLoc,
3365       "invalid op_sel operand");
3366     return false;
3367   }
3368   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3369   if (!validateMIMGD16(Inst)) {
3370     Error(IDLoc,
3371       "d16 modifier is not supported on this GPU");
3372     return false;
3373   }
3374   if (!validateMIMGDim(Inst)) {
3375     Error(IDLoc, "dim modifier is required on this GPU");
3376     return false;
3377   }
3378   if (!validateMIMGDataSize(Inst)) {
3379     Error(IDLoc,
3380       "image data size does not match dmask and tfe");
3381     return false;
3382   }
3383   if (!validateMIMGAddrSize(Inst)) {
3384     Error(IDLoc,
3385       "image address size does not match dim and a16");
3386     return false;
3387   }
3388   if (!validateMIMGAtomicDMask(Inst)) {
3389     Error(IDLoc,
3390       "invalid atomic image dmask");
3391     return false;
3392   }
3393   if (!validateMIMGGatherDMask(Inst)) {
3394     Error(IDLoc,
3395       "invalid image_gather dmask: only one bit must be set");
3396     return false;
3397   }
3398   if (!validateFlatOffset(Inst, Operands)) {
3399     return false;
3400   }
3401 
3402   return true;
3403 }
3404 
3405 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3406                                             const FeatureBitset &FBS,
3407                                             unsigned VariantID = 0);
3408 
3409 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3410                                               OperandVector &Operands,
3411                                               MCStreamer &Out,
3412                                               uint64_t &ErrorInfo,
3413                                               bool MatchingInlineAsm) {
3414   MCInst Inst;
3415   unsigned Result = Match_Success;
3416   for (auto Variant : getMatchedVariants()) {
3417     uint64_t EI;
3418     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3419                                   Variant);
3420     // We order match statuses from least to most specific. We use most specific
3421     // status as resulting
3422     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3423     if ((R == Match_Success) ||
3424         (R == Match_PreferE32) ||
3425         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3426         (R == Match_InvalidOperand && Result != Match_MissingFeature
3427                                    && Result != Match_PreferE32) ||
3428         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3429                                    && Result != Match_MissingFeature
3430                                    && Result != Match_PreferE32)) {
3431       Result = R;
3432       ErrorInfo = EI;
3433     }
3434     if (R == Match_Success)
3435       break;
3436   }
3437 
3438   switch (Result) {
3439   default: break;
3440   case Match_Success:
3441     if (!validateInstruction(Inst, IDLoc, Operands)) {
3442       return true;
3443     }
3444     Inst.setLoc(IDLoc);
3445     Out.EmitInstruction(Inst, getSTI());
3446     return false;
3447 
3448   case Match_MissingFeature:
3449     return Error(IDLoc, "instruction not supported on this GPU");
3450 
3451   case Match_MnemonicFail: {
3452     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3453     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3454         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3455     return Error(IDLoc, "invalid instruction" + Suggestion,
3456                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3457   }
3458 
3459   case Match_InvalidOperand: {
3460     SMLoc ErrorLoc = IDLoc;
3461     if (ErrorInfo != ~0ULL) {
3462       if (ErrorInfo >= Operands.size()) {
3463         return Error(IDLoc, "too few operands for instruction");
3464       }
3465       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3466       if (ErrorLoc == SMLoc())
3467         ErrorLoc = IDLoc;
3468     }
3469     return Error(ErrorLoc, "invalid operand for instruction");
3470   }
3471 
3472   case Match_PreferE32:
3473     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3474                         "should be encoded as e32");
3475   }
3476   llvm_unreachable("Implement any new match types added!");
3477 }
3478 
3479 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3480   int64_t Tmp = -1;
3481   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3482     return true;
3483   }
3484   if (getParser().parseAbsoluteExpression(Tmp)) {
3485     return true;
3486   }
3487   Ret = static_cast<uint32_t>(Tmp);
3488   return false;
3489 }
3490 
3491 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3492                                                uint32_t &Minor) {
3493   if (ParseAsAbsoluteExpression(Major))
3494     return TokError("invalid major version");
3495 
3496   if (getLexer().isNot(AsmToken::Comma))
3497     return TokError("minor version number required, comma expected");
3498   Lex();
3499 
3500   if (ParseAsAbsoluteExpression(Minor))
3501     return TokError("invalid minor version");
3502 
3503   return false;
3504 }
3505 
3506 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3507   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3508     return TokError("directive only supported for amdgcn architecture");
3509 
3510   std::string Target;
3511 
3512   SMLoc TargetStart = getTok().getLoc();
3513   if (getParser().parseEscapedString(Target))
3514     return true;
3515   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3516 
3517   std::string ExpectedTarget;
3518   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3519   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3520 
3521   if (Target != ExpectedTargetOS.str())
3522     return getParser().Error(TargetRange.Start, "target must match options",
3523                              TargetRange);
3524 
3525   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3526   return false;
3527 }
3528 
3529 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3530   return getParser().Error(Range.Start, "value out of range", Range);
3531 }
3532 
3533 bool AMDGPUAsmParser::calculateGPRBlocks(
3534     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3535     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3536     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3537     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3538   // TODO(scott.linder): These calculations are duplicated from
3539   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3540   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3541 
3542   unsigned NumVGPRs = NextFreeVGPR;
3543   unsigned NumSGPRs = NextFreeSGPR;
3544 
3545   if (Version.Major >= 10)
3546     NumSGPRs = 0;
3547   else {
3548     unsigned MaxAddressableNumSGPRs =
3549         IsaInfo::getAddressableNumSGPRs(&getSTI());
3550 
3551     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3552         NumSGPRs > MaxAddressableNumSGPRs)
3553       return OutOfRangeError(SGPRRange);
3554 
3555     NumSGPRs +=
3556         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3557 
3558     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3559         NumSGPRs > MaxAddressableNumSGPRs)
3560       return OutOfRangeError(SGPRRange);
3561 
3562     if (Features.test(FeatureSGPRInitBug))
3563       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3564   }
3565 
3566   VGPRBlocks =
3567       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3568   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3569 
3570   return false;
3571 }
3572 
3573 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3574   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3575     return TokError("directive only supported for amdgcn architecture");
3576 
3577   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3578     return TokError("directive only supported for amdhsa OS");
3579 
3580   StringRef KernelName;
3581   if (getParser().parseIdentifier(KernelName))
3582     return true;
3583 
3584   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3585 
3586   StringSet<> Seen;
3587 
3588   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3589 
3590   SMRange VGPRRange;
3591   uint64_t NextFreeVGPR = 0;
3592   SMRange SGPRRange;
3593   uint64_t NextFreeSGPR = 0;
3594   unsigned UserSGPRCount = 0;
3595   bool ReserveVCC = true;
3596   bool ReserveFlatScr = true;
3597   bool ReserveXNACK = hasXNACK();
3598   Optional<bool> EnableWavefrontSize32;
3599 
3600   while (true) {
3601     while (getLexer().is(AsmToken::EndOfStatement))
3602       Lex();
3603 
3604     if (getLexer().isNot(AsmToken::Identifier))
3605       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3606 
3607     StringRef ID = getTok().getIdentifier();
3608     SMRange IDRange = getTok().getLocRange();
3609     Lex();
3610 
3611     if (ID == ".end_amdhsa_kernel")
3612       break;
3613 
3614     if (Seen.find(ID) != Seen.end())
3615       return TokError(".amdhsa_ directives cannot be repeated");
3616     Seen.insert(ID);
3617 
3618     SMLoc ValStart = getTok().getLoc();
3619     int64_t IVal;
3620     if (getParser().parseAbsoluteExpression(IVal))
3621       return true;
3622     SMLoc ValEnd = getTok().getLoc();
3623     SMRange ValRange = SMRange(ValStart, ValEnd);
3624 
3625     if (IVal < 0)
3626       return OutOfRangeError(ValRange);
3627 
3628     uint64_t Val = IVal;
3629 
3630 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3631   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3632     return OutOfRangeError(RANGE);                                             \
3633   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3634 
3635     if (ID == ".amdhsa_group_segment_fixed_size") {
3636       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3637         return OutOfRangeError(ValRange);
3638       KD.group_segment_fixed_size = Val;
3639     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3640       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3641         return OutOfRangeError(ValRange);
3642       KD.private_segment_fixed_size = Val;
3643     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3644       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3645                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3646                        Val, ValRange);
3647       if (Val)
3648         UserSGPRCount += 4;
3649     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3650       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3651                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3652                        ValRange);
3653       if (Val)
3654         UserSGPRCount += 2;
3655     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3656       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3657                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3658                        ValRange);
3659       if (Val)
3660         UserSGPRCount += 2;
3661     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3662       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3663                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3664                        Val, ValRange);
3665       if (Val)
3666         UserSGPRCount += 2;
3667     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3668       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3669                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3670                        ValRange);
3671       if (Val)
3672         UserSGPRCount += 2;
3673     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3674       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3675                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3676                        ValRange);
3677       if (Val)
3678         UserSGPRCount += 2;
3679     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3680       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3681                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3682                        Val, ValRange);
3683       if (Val)
3684         UserSGPRCount += 1;
3685     } else if (ID == ".amdhsa_wavefront_size32") {
3686       if (IVersion.Major < 10)
3687         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3688                                  IDRange);
3689       EnableWavefrontSize32 = Val;
3690       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3691                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3692                        Val, ValRange);
3693     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3694       PARSE_BITS_ENTRY(
3695           KD.compute_pgm_rsrc2,
3696           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3697           ValRange);
3698     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3699       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3700                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3701                        ValRange);
3702     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3703       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3704                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3705                        ValRange);
3706     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3707       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3708                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3709                        ValRange);
3710     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3711       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3712                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3713                        ValRange);
3714     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3715       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3716                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3717                        ValRange);
3718     } else if (ID == ".amdhsa_next_free_vgpr") {
3719       VGPRRange = ValRange;
3720       NextFreeVGPR = Val;
3721     } else if (ID == ".amdhsa_next_free_sgpr") {
3722       SGPRRange = ValRange;
3723       NextFreeSGPR = Val;
3724     } else if (ID == ".amdhsa_reserve_vcc") {
3725       if (!isUInt<1>(Val))
3726         return OutOfRangeError(ValRange);
3727       ReserveVCC = Val;
3728     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3729       if (IVersion.Major < 7)
3730         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3731                                  IDRange);
3732       if (!isUInt<1>(Val))
3733         return OutOfRangeError(ValRange);
3734       ReserveFlatScr = Val;
3735     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3736       if (IVersion.Major < 8)
3737         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3738                                  IDRange);
3739       if (!isUInt<1>(Val))
3740         return OutOfRangeError(ValRange);
3741       ReserveXNACK = Val;
3742     } else if (ID == ".amdhsa_float_round_mode_32") {
3743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3744                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3745     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3746       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3747                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3748     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3749       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3750                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3751     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3752       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3753                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3754                        ValRange);
3755     } else if (ID == ".amdhsa_dx10_clamp") {
3756       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3757                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3758     } else if (ID == ".amdhsa_ieee_mode") {
3759       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3760                        Val, ValRange);
3761     } else if (ID == ".amdhsa_fp16_overflow") {
3762       if (IVersion.Major < 9)
3763         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3764                                  IDRange);
3765       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3766                        ValRange);
3767     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3768       if (IVersion.Major < 10)
3769         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3770                                  IDRange);
3771       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3772                        ValRange);
3773     } else if (ID == ".amdhsa_memory_ordered") {
3774       if (IVersion.Major < 10)
3775         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3776                                  IDRange);
3777       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3778                        ValRange);
3779     } else if (ID == ".amdhsa_forward_progress") {
3780       if (IVersion.Major < 10)
3781         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3782                                  IDRange);
3783       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3784                        ValRange);
3785     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3786       PARSE_BITS_ENTRY(
3787           KD.compute_pgm_rsrc2,
3788           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3789           ValRange);
3790     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3791       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3792                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3793                        Val, ValRange);
3794     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3795       PARSE_BITS_ENTRY(
3796           KD.compute_pgm_rsrc2,
3797           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3798           ValRange);
3799     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3800       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3801                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3802                        Val, ValRange);
3803     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3804       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3805                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3806                        Val, ValRange);
3807     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3808       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3809                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3810                        Val, ValRange);
3811     } else if (ID == ".amdhsa_exception_int_div_zero") {
3812       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3813                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3814                        Val, ValRange);
3815     } else {
3816       return getParser().Error(IDRange.Start,
3817                                "unknown .amdhsa_kernel directive", IDRange);
3818     }
3819 
3820 #undef PARSE_BITS_ENTRY
3821   }
3822 
3823   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3824     return TokError(".amdhsa_next_free_vgpr directive is required");
3825 
3826   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3827     return TokError(".amdhsa_next_free_sgpr directive is required");
3828 
3829   unsigned VGPRBlocks;
3830   unsigned SGPRBlocks;
3831   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3832                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3833                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3834                          SGPRBlocks))
3835     return true;
3836 
3837   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3838           VGPRBlocks))
3839     return OutOfRangeError(VGPRRange);
3840   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3841                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3842 
3843   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3844           SGPRBlocks))
3845     return OutOfRangeError(SGPRRange);
3846   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3847                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3848                   SGPRBlocks);
3849 
3850   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3851     return TokError("too many user SGPRs enabled");
3852   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3853                   UserSGPRCount);
3854 
3855   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3856       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3857       ReserveFlatScr, ReserveXNACK);
3858   return false;
3859 }
3860 
3861 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3862   uint32_t Major;
3863   uint32_t Minor;
3864 
3865   if (ParseDirectiveMajorMinor(Major, Minor))
3866     return true;
3867 
3868   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3869   return false;
3870 }
3871 
3872 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3873   uint32_t Major;
3874   uint32_t Minor;
3875   uint32_t Stepping;
3876   StringRef VendorName;
3877   StringRef ArchName;
3878 
3879   // If this directive has no arguments, then use the ISA version for the
3880   // targeted GPU.
3881   if (getLexer().is(AsmToken::EndOfStatement)) {
3882     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3883     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3884                                                       ISA.Stepping,
3885                                                       "AMD", "AMDGPU");
3886     return false;
3887   }
3888 
3889   if (ParseDirectiveMajorMinor(Major, Minor))
3890     return true;
3891 
3892   if (getLexer().isNot(AsmToken::Comma))
3893     return TokError("stepping version number required, comma expected");
3894   Lex();
3895 
3896   if (ParseAsAbsoluteExpression(Stepping))
3897     return TokError("invalid stepping version");
3898 
3899   if (getLexer().isNot(AsmToken::Comma))
3900     return TokError("vendor name required, comma expected");
3901   Lex();
3902 
3903   if (getLexer().isNot(AsmToken::String))
3904     return TokError("invalid vendor name");
3905 
3906   VendorName = getLexer().getTok().getStringContents();
3907   Lex();
3908 
3909   if (getLexer().isNot(AsmToken::Comma))
3910     return TokError("arch name required, comma expected");
3911   Lex();
3912 
3913   if (getLexer().isNot(AsmToken::String))
3914     return TokError("invalid arch name");
3915 
3916   ArchName = getLexer().getTok().getStringContents();
3917   Lex();
3918 
3919   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3920                                                     VendorName, ArchName);
3921   return false;
3922 }
3923 
3924 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3925                                                amd_kernel_code_t &Header) {
3926   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3927   // assembly for backwards compatibility.
3928   if (ID == "max_scratch_backing_memory_byte_size") {
3929     Parser.eatToEndOfStatement();
3930     return false;
3931   }
3932 
3933   SmallString<40> ErrStr;
3934   raw_svector_ostream Err(ErrStr);
3935   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3936     return TokError(Err.str());
3937   }
3938   Lex();
3939 
3940   if (ID == "enable_wavefront_size32") {
3941     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3942       if (!isGFX10())
3943         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3944       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3945         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3946     } else {
3947       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3948         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3949     }
3950   }
3951 
3952   if (ID == "wavefront_size") {
3953     if (Header.wavefront_size == 5) {
3954       if (!isGFX10())
3955         return TokError("wavefront_size=5 is only allowed on GFX10+");
3956       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3957         return TokError("wavefront_size=5 requires +WavefrontSize32");
3958     } else if (Header.wavefront_size == 6) {
3959       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3960         return TokError("wavefront_size=6 requires +WavefrontSize64");
3961     }
3962   }
3963 
3964   if (ID == "enable_wgp_mode") {
3965     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3966       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3967   }
3968 
3969   if (ID == "enable_mem_ordered") {
3970     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3971       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3972   }
3973 
3974   if (ID == "enable_fwd_progress") {
3975     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3976       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3977   }
3978 
3979   return false;
3980 }
3981 
3982 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3983   amd_kernel_code_t Header;
3984   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3985 
3986   while (true) {
3987     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3988     // will set the current token to EndOfStatement.
3989     while(getLexer().is(AsmToken::EndOfStatement))
3990       Lex();
3991 
3992     if (getLexer().isNot(AsmToken::Identifier))
3993       return TokError("expected value identifier or .end_amd_kernel_code_t");
3994 
3995     StringRef ID = getLexer().getTok().getIdentifier();
3996     Lex();
3997 
3998     if (ID == ".end_amd_kernel_code_t")
3999       break;
4000 
4001     if (ParseAMDKernelCodeTValue(ID, Header))
4002       return true;
4003   }
4004 
4005   getTargetStreamer().EmitAMDKernelCodeT(Header);
4006 
4007   return false;
4008 }
4009 
4010 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4011   if (getLexer().isNot(AsmToken::Identifier))
4012     return TokError("expected symbol name");
4013 
4014   StringRef KernelName = Parser.getTok().getString();
4015 
4016   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4017                                            ELF::STT_AMDGPU_HSA_KERNEL);
4018   Lex();
4019   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4020     KernelScope.initialize(getContext());
4021   return false;
4022 }
4023 
4024 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4025   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4026     return Error(getParser().getTok().getLoc(),
4027                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4028                  "architectures");
4029   }
4030 
4031   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4032 
4033   std::string ISAVersionStringFromSTI;
4034   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4035   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4036 
4037   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4038     return Error(getParser().getTok().getLoc(),
4039                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4040                  "arguments specified through the command line");
4041   }
4042 
4043   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4044   Lex();
4045 
4046   return false;
4047 }
4048 
4049 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4050   const char *AssemblerDirectiveBegin;
4051   const char *AssemblerDirectiveEnd;
4052   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4053       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4054           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4055                             HSAMD::V3::AssemblerDirectiveEnd)
4056           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4057                             HSAMD::AssemblerDirectiveEnd);
4058 
4059   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4060     return Error(getParser().getTok().getLoc(),
4061                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4062                  "not available on non-amdhsa OSes")).str());
4063   }
4064 
4065   std::string HSAMetadataString;
4066   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4067                           HSAMetadataString))
4068     return true;
4069 
4070   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4071     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4072       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4073   } else {
4074     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4075       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4076   }
4077 
4078   return false;
4079 }
4080 
4081 /// Common code to parse out a block of text (typically YAML) between start and
4082 /// end directives.
4083 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4084                                           const char *AssemblerDirectiveEnd,
4085                                           std::string &CollectString) {
4086 
4087   raw_string_ostream CollectStream(CollectString);
4088 
4089   getLexer().setSkipSpace(false);
4090 
4091   bool FoundEnd = false;
4092   while (!getLexer().is(AsmToken::Eof)) {
4093     while (getLexer().is(AsmToken::Space)) {
4094       CollectStream << getLexer().getTok().getString();
4095       Lex();
4096     }
4097 
4098     if (getLexer().is(AsmToken::Identifier)) {
4099       StringRef ID = getLexer().getTok().getIdentifier();
4100       if (ID == AssemblerDirectiveEnd) {
4101         Lex();
4102         FoundEnd = true;
4103         break;
4104       }
4105     }
4106 
4107     CollectStream << Parser.parseStringToEndOfStatement()
4108                   << getContext().getAsmInfo()->getSeparatorString();
4109 
4110     Parser.eatToEndOfStatement();
4111   }
4112 
4113   getLexer().setSkipSpace(true);
4114 
4115   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4116     return TokError(Twine("expected directive ") +
4117                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4118   }
4119 
4120   CollectStream.flush();
4121   return false;
4122 }
4123 
4124 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4125 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4126   std::string String;
4127   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4128                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4129     return true;
4130 
4131   auto PALMetadata = getTargetStreamer().getPALMetadata();
4132   if (!PALMetadata->setFromString(String))
4133     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4134   return false;
4135 }
4136 
4137 /// Parse the assembler directive for old linear-format PAL metadata.
4138 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4139   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4140     return Error(getParser().getTok().getLoc(),
4141                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4142                  "not available on non-amdpal OSes")).str());
4143   }
4144 
4145   auto PALMetadata = getTargetStreamer().getPALMetadata();
4146   PALMetadata->setLegacy();
4147   for (;;) {
4148     uint32_t Key, Value;
4149     if (ParseAsAbsoluteExpression(Key)) {
4150       return TokError(Twine("invalid value in ") +
4151                       Twine(PALMD::AssemblerDirective));
4152     }
4153     if (getLexer().isNot(AsmToken::Comma)) {
4154       return TokError(Twine("expected an even number of values in ") +
4155                       Twine(PALMD::AssemblerDirective));
4156     }
4157     Lex();
4158     if (ParseAsAbsoluteExpression(Value)) {
4159       return TokError(Twine("invalid value in ") +
4160                       Twine(PALMD::AssemblerDirective));
4161     }
4162     PALMetadata->setRegister(Key, Value);
4163     if (getLexer().isNot(AsmToken::Comma))
4164       break;
4165     Lex();
4166   }
4167   return false;
4168 }
4169 
4170 /// ParseDirectiveAMDGPULDS
4171 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4172 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4173   if (getParser().checkForValidSection())
4174     return true;
4175 
4176   StringRef Name;
4177   SMLoc NameLoc = getLexer().getLoc();
4178   if (getParser().parseIdentifier(Name))
4179     return TokError("expected identifier in directive");
4180 
4181   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4182   if (parseToken(AsmToken::Comma, "expected ','"))
4183     return true;
4184 
4185   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4186 
4187   int64_t Size;
4188   SMLoc SizeLoc = getLexer().getLoc();
4189   if (getParser().parseAbsoluteExpression(Size))
4190     return true;
4191   if (Size < 0)
4192     return Error(SizeLoc, "size must be non-negative");
4193   if (Size > LocalMemorySize)
4194     return Error(SizeLoc, "size is too large");
4195 
4196   int64_t Align = 4;
4197   if (getLexer().is(AsmToken::Comma)) {
4198     Lex();
4199     SMLoc AlignLoc = getLexer().getLoc();
4200     if (getParser().parseAbsoluteExpression(Align))
4201       return true;
4202     if (Align < 0 || !isPowerOf2_64(Align))
4203       return Error(AlignLoc, "alignment must be a power of two");
4204 
4205     // Alignment larger than the size of LDS is possible in theory, as long
4206     // as the linker manages to place to symbol at address 0, but we do want
4207     // to make sure the alignment fits nicely into a 32-bit integer.
4208     if (Align >= 1u << 31)
4209       return Error(AlignLoc, "alignment is too large");
4210   }
4211 
4212   if (parseToken(AsmToken::EndOfStatement,
4213                  "unexpected token in '.amdgpu_lds' directive"))
4214     return true;
4215 
4216   Symbol->redefineIfPossible();
4217   if (!Symbol->isUndefined())
4218     return Error(NameLoc, "invalid symbol redefinition");
4219 
4220   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4221   return false;
4222 }
4223 
4224 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4225   StringRef IDVal = DirectiveID.getString();
4226 
4227   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4228     if (IDVal == ".amdgcn_target")
4229       return ParseDirectiveAMDGCNTarget();
4230 
4231     if (IDVal == ".amdhsa_kernel")
4232       return ParseDirectiveAMDHSAKernel();
4233 
4234     // TODO: Restructure/combine with PAL metadata directive.
4235     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4236       return ParseDirectiveHSAMetadata();
4237   } else {
4238     if (IDVal == ".hsa_code_object_version")
4239       return ParseDirectiveHSACodeObjectVersion();
4240 
4241     if (IDVal == ".hsa_code_object_isa")
4242       return ParseDirectiveHSACodeObjectISA();
4243 
4244     if (IDVal == ".amd_kernel_code_t")
4245       return ParseDirectiveAMDKernelCodeT();
4246 
4247     if (IDVal == ".amdgpu_hsa_kernel")
4248       return ParseDirectiveAMDGPUHsaKernel();
4249 
4250     if (IDVal == ".amd_amdgpu_isa")
4251       return ParseDirectiveISAVersion();
4252 
4253     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4254       return ParseDirectiveHSAMetadata();
4255   }
4256 
4257   if (IDVal == ".amdgpu_lds")
4258     return ParseDirectiveAMDGPULDS();
4259 
4260   if (IDVal == PALMD::AssemblerDirectiveBegin)
4261     return ParseDirectivePALMetadataBegin();
4262 
4263   if (IDVal == PALMD::AssemblerDirective)
4264     return ParseDirectivePALMetadata();
4265 
4266   return true;
4267 }
4268 
4269 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4270                                            unsigned RegNo) const {
4271 
4272   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4273        R.isValid(); ++R) {
4274     if (*R == RegNo)
4275       return isGFX9() || isGFX10();
4276   }
4277 
4278   // GFX10 has 2 more SGPRs 104 and 105.
4279   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4280        R.isValid(); ++R) {
4281     if (*R == RegNo)
4282       return hasSGPR104_SGPR105();
4283   }
4284 
4285   switch (RegNo) {
4286   case AMDGPU::SRC_SHARED_BASE:
4287   case AMDGPU::SRC_SHARED_LIMIT:
4288   case AMDGPU::SRC_PRIVATE_BASE:
4289   case AMDGPU::SRC_PRIVATE_LIMIT:
4290   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4291     return !isCI() && !isSI() && !isVI();
4292   case AMDGPU::TBA:
4293   case AMDGPU::TBA_LO:
4294   case AMDGPU::TBA_HI:
4295   case AMDGPU::TMA:
4296   case AMDGPU::TMA_LO:
4297   case AMDGPU::TMA_HI:
4298     return !isGFX9() && !isGFX10();
4299   case AMDGPU::XNACK_MASK:
4300   case AMDGPU::XNACK_MASK_LO:
4301   case AMDGPU::XNACK_MASK_HI:
4302     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4303   case AMDGPU::SGPR_NULL:
4304     return isGFX10();
4305   default:
4306     break;
4307   }
4308 
4309   if (isCI())
4310     return true;
4311 
4312   if (isSI() || isGFX10()) {
4313     // No flat_scr on SI.
4314     // On GFX10 flat scratch is not a valid register operand and can only be
4315     // accessed with s_setreg/s_getreg.
4316     switch (RegNo) {
4317     case AMDGPU::FLAT_SCR:
4318     case AMDGPU::FLAT_SCR_LO:
4319     case AMDGPU::FLAT_SCR_HI:
4320       return false;
4321     default:
4322       return true;
4323     }
4324   }
4325 
4326   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4327   // SI/CI have.
4328   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4329        R.isValid(); ++R) {
4330     if (*R == RegNo)
4331       return hasSGPR102_SGPR103();
4332   }
4333 
4334   return true;
4335 }
4336 
4337 OperandMatchResultTy
4338 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4339                               OperandMode Mode) {
4340   // Try to parse with a custom parser
4341   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4342 
4343   // If we successfully parsed the operand or if there as an error parsing,
4344   // we are done.
4345   //
4346   // If we are parsing after we reach EndOfStatement then this means we
4347   // are appending default values to the Operands list.  This is only done
4348   // by custom parser, so we shouldn't continue on to the generic parsing.
4349   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4350       getLexer().is(AsmToken::EndOfStatement))
4351     return ResTy;
4352 
4353   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4354     unsigned Prefix = Operands.size();
4355     SMLoc LBraceLoc = getTok().getLoc();
4356     Parser.Lex(); // eat the '['
4357 
4358     for (;;) {
4359       ResTy = parseReg(Operands);
4360       if (ResTy != MatchOperand_Success)
4361         return ResTy;
4362 
4363       if (getLexer().is(AsmToken::RBrac))
4364         break;
4365 
4366       if (getLexer().isNot(AsmToken::Comma))
4367         return MatchOperand_ParseFail;
4368       Parser.Lex();
4369     }
4370 
4371     if (Operands.size() - Prefix > 1) {
4372       Operands.insert(Operands.begin() + Prefix,
4373                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4374       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4375                                                     getTok().getLoc()));
4376     }
4377 
4378     Parser.Lex(); // eat the ']'
4379     return MatchOperand_Success;
4380   }
4381 
4382   return parseRegOrImm(Operands);
4383 }
4384 
4385 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4386   // Clear any forced encodings from the previous instruction.
4387   setForcedEncodingSize(0);
4388   setForcedDPP(false);
4389   setForcedSDWA(false);
4390 
4391   if (Name.endswith("_e64")) {
4392     setForcedEncodingSize(64);
4393     return Name.substr(0, Name.size() - 4);
4394   } else if (Name.endswith("_e32")) {
4395     setForcedEncodingSize(32);
4396     return Name.substr(0, Name.size() - 4);
4397   } else if (Name.endswith("_dpp")) {
4398     setForcedDPP(true);
4399     return Name.substr(0, Name.size() - 4);
4400   } else if (Name.endswith("_sdwa")) {
4401     setForcedSDWA(true);
4402     return Name.substr(0, Name.size() - 5);
4403   }
4404   return Name;
4405 }
4406 
4407 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4408                                        StringRef Name,
4409                                        SMLoc NameLoc, OperandVector &Operands) {
4410   // Add the instruction mnemonic
4411   Name = parseMnemonicSuffix(Name);
4412   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4413 
4414   bool IsMIMG = Name.startswith("image_");
4415 
4416   while (!getLexer().is(AsmToken::EndOfStatement)) {
4417     OperandMode Mode = OperandMode_Default;
4418     if (IsMIMG && isGFX10() && Operands.size() == 2)
4419       Mode = OperandMode_NSA;
4420     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4421 
4422     // Eat the comma or space if there is one.
4423     if (getLexer().is(AsmToken::Comma))
4424       Parser.Lex();
4425 
4426     switch (Res) {
4427       case MatchOperand_Success: break;
4428       case MatchOperand_ParseFail:
4429         // FIXME: use real operand location rather than the current location.
4430         Error(getLexer().getLoc(), "failed parsing operand.");
4431         while (!getLexer().is(AsmToken::EndOfStatement)) {
4432           Parser.Lex();
4433         }
4434         return true;
4435       case MatchOperand_NoMatch:
4436         // FIXME: use real operand location rather than the current location.
4437         Error(getLexer().getLoc(), "not a valid operand.");
4438         while (!getLexer().is(AsmToken::EndOfStatement)) {
4439           Parser.Lex();
4440         }
4441         return true;
4442     }
4443   }
4444 
4445   return false;
4446 }
4447 
4448 //===----------------------------------------------------------------------===//
4449 // Utility functions
4450 //===----------------------------------------------------------------------===//
4451 
4452 OperandMatchResultTy
4453 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4454 
4455   if (!trySkipId(Prefix, AsmToken::Colon))
4456     return MatchOperand_NoMatch;
4457 
4458   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4459 }
4460 
4461 OperandMatchResultTy
4462 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4463                                     AMDGPUOperand::ImmTy ImmTy,
4464                                     bool (*ConvertResult)(int64_t&)) {
4465   SMLoc S = getLoc();
4466   int64_t Value = 0;
4467 
4468   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4469   if (Res != MatchOperand_Success)
4470     return Res;
4471 
4472   if (ConvertResult && !ConvertResult(Value)) {
4473     Error(S, "invalid " + StringRef(Prefix) + " value.");
4474   }
4475 
4476   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4477   return MatchOperand_Success;
4478 }
4479 
4480 OperandMatchResultTy
4481 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4482                                              OperandVector &Operands,
4483                                              AMDGPUOperand::ImmTy ImmTy,
4484                                              bool (*ConvertResult)(int64_t&)) {
4485   SMLoc S = getLoc();
4486   if (!trySkipId(Prefix, AsmToken::Colon))
4487     return MatchOperand_NoMatch;
4488 
4489   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4490     return MatchOperand_ParseFail;
4491 
4492   unsigned Val = 0;
4493   const unsigned MaxSize = 4;
4494 
4495   // FIXME: How to verify the number of elements matches the number of src
4496   // operands?
4497   for (int I = 0; ; ++I) {
4498     int64_t Op;
4499     SMLoc Loc = getLoc();
4500     if (!parseExpr(Op))
4501       return MatchOperand_ParseFail;
4502 
4503     if (Op != 0 && Op != 1) {
4504       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4505       return MatchOperand_ParseFail;
4506     }
4507 
4508     Val |= (Op << I);
4509 
4510     if (trySkipToken(AsmToken::RBrac))
4511       break;
4512 
4513     if (I + 1 == MaxSize) {
4514       Error(getLoc(), "expected a closing square bracket");
4515       return MatchOperand_ParseFail;
4516     }
4517 
4518     if (!skipToken(AsmToken::Comma, "expected a comma"))
4519       return MatchOperand_ParseFail;
4520   }
4521 
4522   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4523   return MatchOperand_Success;
4524 }
4525 
4526 OperandMatchResultTy
4527 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4528                                AMDGPUOperand::ImmTy ImmTy) {
4529   int64_t Bit = 0;
4530   SMLoc S = Parser.getTok().getLoc();
4531 
4532   // We are at the end of the statement, and this is a default argument, so
4533   // use a default value.
4534   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4535     switch(getLexer().getKind()) {
4536       case AsmToken::Identifier: {
4537         StringRef Tok = Parser.getTok().getString();
4538         if (Tok == Name) {
4539           if (Tok == "r128" && isGFX9())
4540             Error(S, "r128 modifier is not supported on this GPU");
4541           if (Tok == "a16" && !isGFX9() && !isGFX10())
4542             Error(S, "a16 modifier is not supported on this GPU");
4543           Bit = 1;
4544           Parser.Lex();
4545         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4546           Bit = 0;
4547           Parser.Lex();
4548         } else {
4549           return MatchOperand_NoMatch;
4550         }
4551         break;
4552       }
4553       default:
4554         return MatchOperand_NoMatch;
4555     }
4556   }
4557 
4558   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4559     return MatchOperand_ParseFail;
4560 
4561   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4562   return MatchOperand_Success;
4563 }
4564 
4565 static void addOptionalImmOperand(
4566   MCInst& Inst, const OperandVector& Operands,
4567   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4568   AMDGPUOperand::ImmTy ImmT,
4569   int64_t Default = 0) {
4570   auto i = OptionalIdx.find(ImmT);
4571   if (i != OptionalIdx.end()) {
4572     unsigned Idx = i->second;
4573     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4574   } else {
4575     Inst.addOperand(MCOperand::createImm(Default));
4576   }
4577 }
4578 
4579 OperandMatchResultTy
4580 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4581   if (getLexer().isNot(AsmToken::Identifier)) {
4582     return MatchOperand_NoMatch;
4583   }
4584   StringRef Tok = Parser.getTok().getString();
4585   if (Tok != Prefix) {
4586     return MatchOperand_NoMatch;
4587   }
4588 
4589   Parser.Lex();
4590   if (getLexer().isNot(AsmToken::Colon)) {
4591     return MatchOperand_ParseFail;
4592   }
4593 
4594   Parser.Lex();
4595   if (getLexer().isNot(AsmToken::Identifier)) {
4596     return MatchOperand_ParseFail;
4597   }
4598 
4599   Value = Parser.getTok().getString();
4600   return MatchOperand_Success;
4601 }
4602 
4603 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4604 // values to live in a joint format operand in the MCInst encoding.
4605 OperandMatchResultTy
4606 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4607   SMLoc S = Parser.getTok().getLoc();
4608   int64_t Dfmt = 0, Nfmt = 0;
4609   // dfmt and nfmt can appear in either order, and each is optional.
4610   bool GotDfmt = false, GotNfmt = false;
4611   while (!GotDfmt || !GotNfmt) {
4612     if (!GotDfmt) {
4613       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4614       if (Res != MatchOperand_NoMatch) {
4615         if (Res != MatchOperand_Success)
4616           return Res;
4617         if (Dfmt >= 16) {
4618           Error(Parser.getTok().getLoc(), "out of range dfmt");
4619           return MatchOperand_ParseFail;
4620         }
4621         GotDfmt = true;
4622         Parser.Lex();
4623         continue;
4624       }
4625     }
4626     if (!GotNfmt) {
4627       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4628       if (Res != MatchOperand_NoMatch) {
4629         if (Res != MatchOperand_Success)
4630           return Res;
4631         if (Nfmt >= 8) {
4632           Error(Parser.getTok().getLoc(), "out of range nfmt");
4633           return MatchOperand_ParseFail;
4634         }
4635         GotNfmt = true;
4636         Parser.Lex();
4637         continue;
4638       }
4639     }
4640     break;
4641   }
4642   if (!GotDfmt && !GotNfmt)
4643     return MatchOperand_NoMatch;
4644   auto Format = Dfmt | Nfmt << 4;
4645   Operands.push_back(
4646       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4647   return MatchOperand_Success;
4648 }
4649 
4650 //===----------------------------------------------------------------------===//
4651 // ds
4652 //===----------------------------------------------------------------------===//
4653 
4654 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4655                                     const OperandVector &Operands) {
4656   OptionalImmIndexMap OptionalIdx;
4657 
4658   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4659     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4660 
4661     // Add the register arguments
4662     if (Op.isReg()) {
4663       Op.addRegOperands(Inst, 1);
4664       continue;
4665     }
4666 
4667     // Handle optional arguments
4668     OptionalIdx[Op.getImmTy()] = i;
4669   }
4670 
4671   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4672   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4673   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4674 
4675   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4676 }
4677 
4678 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4679                                 bool IsGdsHardcoded) {
4680   OptionalImmIndexMap OptionalIdx;
4681 
4682   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4683     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4684 
4685     // Add the register arguments
4686     if (Op.isReg()) {
4687       Op.addRegOperands(Inst, 1);
4688       continue;
4689     }
4690 
4691     if (Op.isToken() && Op.getToken() == "gds") {
4692       IsGdsHardcoded = true;
4693       continue;
4694     }
4695 
4696     // Handle optional arguments
4697     OptionalIdx[Op.getImmTy()] = i;
4698   }
4699 
4700   AMDGPUOperand::ImmTy OffsetType =
4701     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4702      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4703      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4704                                                       AMDGPUOperand::ImmTyOffset;
4705 
4706   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4707 
4708   if (!IsGdsHardcoded) {
4709     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4710   }
4711   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4712 }
4713 
4714 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4715   OptionalImmIndexMap OptionalIdx;
4716 
4717   unsigned OperandIdx[4];
4718   unsigned EnMask = 0;
4719   int SrcIdx = 0;
4720 
4721   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4722     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4723 
4724     // Add the register arguments
4725     if (Op.isReg()) {
4726       assert(SrcIdx < 4);
4727       OperandIdx[SrcIdx] = Inst.size();
4728       Op.addRegOperands(Inst, 1);
4729       ++SrcIdx;
4730       continue;
4731     }
4732 
4733     if (Op.isOff()) {
4734       assert(SrcIdx < 4);
4735       OperandIdx[SrcIdx] = Inst.size();
4736       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4737       ++SrcIdx;
4738       continue;
4739     }
4740 
4741     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4742       Op.addImmOperands(Inst, 1);
4743       continue;
4744     }
4745 
4746     if (Op.isToken() && Op.getToken() == "done")
4747       continue;
4748 
4749     // Handle optional arguments
4750     OptionalIdx[Op.getImmTy()] = i;
4751   }
4752 
4753   assert(SrcIdx == 4);
4754 
4755   bool Compr = false;
4756   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4757     Compr = true;
4758     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4759     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4760     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4761   }
4762 
4763   for (auto i = 0; i < SrcIdx; ++i) {
4764     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4765       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4766     }
4767   }
4768 
4769   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4770   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4771 
4772   Inst.addOperand(MCOperand::createImm(EnMask));
4773 }
4774 
4775 //===----------------------------------------------------------------------===//
4776 // s_waitcnt
4777 //===----------------------------------------------------------------------===//
4778 
4779 static bool
4780 encodeCnt(
4781   const AMDGPU::IsaVersion ISA,
4782   int64_t &IntVal,
4783   int64_t CntVal,
4784   bool Saturate,
4785   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4786   unsigned (*decode)(const IsaVersion &Version, unsigned))
4787 {
4788   bool Failed = false;
4789 
4790   IntVal = encode(ISA, IntVal, CntVal);
4791   if (CntVal != decode(ISA, IntVal)) {
4792     if (Saturate) {
4793       IntVal = encode(ISA, IntVal, -1);
4794     } else {
4795       Failed = true;
4796     }
4797   }
4798   return Failed;
4799 }
4800 
4801 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4802 
4803   SMLoc CntLoc = getLoc();
4804   StringRef CntName = getTokenStr();
4805 
4806   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4807       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4808     return false;
4809 
4810   int64_t CntVal;
4811   SMLoc ValLoc = getLoc();
4812   if (!parseExpr(CntVal))
4813     return false;
4814 
4815   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4816 
4817   bool Failed = true;
4818   bool Sat = CntName.endswith("_sat");
4819 
4820   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4821     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4822   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4823     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4824   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4825     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4826   } else {
4827     Error(CntLoc, "invalid counter name " + CntName);
4828     return false;
4829   }
4830 
4831   if (Failed) {
4832     Error(ValLoc, "too large value for " + CntName);
4833     return false;
4834   }
4835 
4836   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4837     return false;
4838 
4839   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4840     if (isToken(AsmToken::EndOfStatement)) {
4841       Error(getLoc(), "expected a counter name");
4842       return false;
4843     }
4844   }
4845 
4846   return true;
4847 }
4848 
4849 OperandMatchResultTy
4850 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4851   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4852   int64_t Waitcnt = getWaitcntBitMask(ISA);
4853   SMLoc S = getLoc();
4854 
4855   // If parse failed, do not return error code
4856   // to avoid excessive error messages.
4857   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4858     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4859   } else {
4860     parseExpr(Waitcnt);
4861   }
4862 
4863   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4864   return MatchOperand_Success;
4865 }
4866 
4867 bool
4868 AMDGPUOperand::isSWaitCnt() const {
4869   return isImm();
4870 }
4871 
4872 //===----------------------------------------------------------------------===//
4873 // hwreg
4874 //===----------------------------------------------------------------------===//
4875 
4876 bool
4877 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4878                                 int64_t &Offset,
4879                                 int64_t &Width) {
4880   using namespace llvm::AMDGPU::Hwreg;
4881 
4882   // The register may be specified by name or using a numeric code
4883   if (isToken(AsmToken::Identifier) &&
4884       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4885     HwReg.IsSymbolic = true;
4886     lex(); // skip message name
4887   } else if (!parseExpr(HwReg.Id)) {
4888     return false;
4889   }
4890 
4891   if (trySkipToken(AsmToken::RParen))
4892     return true;
4893 
4894   // parse optional params
4895   return
4896     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4897     parseExpr(Offset) &&
4898     skipToken(AsmToken::Comma, "expected a comma") &&
4899     parseExpr(Width) &&
4900     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4901 }
4902 
4903 bool
4904 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4905                                const int64_t Offset,
4906                                const int64_t Width,
4907                                const SMLoc Loc) {
4908 
4909   using namespace llvm::AMDGPU::Hwreg;
4910 
4911   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4912     Error(Loc, "specified hardware register is not supported on this GPU");
4913     return false;
4914   } else if (!isValidHwreg(HwReg.Id)) {
4915     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4916     return false;
4917   } else if (!isValidHwregOffset(Offset)) {
4918     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4919     return false;
4920   } else if (!isValidHwregWidth(Width)) {
4921     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4922     return false;
4923   }
4924   return true;
4925 }
4926 
4927 OperandMatchResultTy
4928 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4929   using namespace llvm::AMDGPU::Hwreg;
4930 
4931   int64_t ImmVal = 0;
4932   SMLoc Loc = getLoc();
4933 
4934   // If parse failed, do not return error code
4935   // to avoid excessive error messages.
4936   if (trySkipId("hwreg", AsmToken::LParen)) {
4937     OperandInfoTy HwReg(ID_UNKNOWN_);
4938     int64_t Offset = OFFSET_DEFAULT_;
4939     int64_t Width = WIDTH_DEFAULT_;
4940     if (parseHwregBody(HwReg, Offset, Width) &&
4941         validateHwreg(HwReg, Offset, Width, Loc)) {
4942       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4943     }
4944   } else if (parseExpr(ImmVal)) {
4945     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4946       Error(Loc, "invalid immediate: only 16-bit values are legal");
4947   }
4948 
4949   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4950   return MatchOperand_Success;
4951 }
4952 
4953 bool AMDGPUOperand::isHwreg() const {
4954   return isImmTy(ImmTyHwreg);
4955 }
4956 
4957 //===----------------------------------------------------------------------===//
4958 // sendmsg
4959 //===----------------------------------------------------------------------===//
4960 
4961 bool
4962 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4963                                   OperandInfoTy &Op,
4964                                   OperandInfoTy &Stream) {
4965   using namespace llvm::AMDGPU::SendMsg;
4966 
4967   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4968     Msg.IsSymbolic = true;
4969     lex(); // skip message name
4970   } else if (!parseExpr(Msg.Id)) {
4971     return false;
4972   }
4973 
4974   if (trySkipToken(AsmToken::Comma)) {
4975     Op.IsDefined = true;
4976     if (isToken(AsmToken::Identifier) &&
4977         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4978       lex(); // skip operation name
4979     } else if (!parseExpr(Op.Id)) {
4980       return false;
4981     }
4982 
4983     if (trySkipToken(AsmToken::Comma)) {
4984       Stream.IsDefined = true;
4985       if (!parseExpr(Stream.Id))
4986         return false;
4987     }
4988   }
4989 
4990   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4991 }
4992 
4993 bool
4994 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4995                                  const OperandInfoTy &Op,
4996                                  const OperandInfoTy &Stream,
4997                                  const SMLoc S) {
4998   using namespace llvm::AMDGPU::SendMsg;
4999 
5000   // Validation strictness depends on whether message is specified
5001   // in a symbolc or in a numeric form. In the latter case
5002   // only encoding possibility is checked.
5003   bool Strict = Msg.IsSymbolic;
5004 
5005   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5006     Error(S, "invalid message id");
5007     return false;
5008   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5009     Error(S, Op.IsDefined ?
5010              "message does not support operations" :
5011              "missing message operation");
5012     return false;
5013   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5014     Error(S, "invalid operation id");
5015     return false;
5016   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5017     Error(S, "message operation does not support streams");
5018     return false;
5019   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5020     Error(S, "invalid message stream id");
5021     return false;
5022   }
5023   return true;
5024 }
5025 
5026 OperandMatchResultTy
5027 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5028   using namespace llvm::AMDGPU::SendMsg;
5029 
5030   int64_t ImmVal = 0;
5031   SMLoc Loc = getLoc();
5032 
5033   // If parse failed, do not return error code
5034   // to avoid excessive error messages.
5035   if (trySkipId("sendmsg", AsmToken::LParen)) {
5036     OperandInfoTy Msg(ID_UNKNOWN_);
5037     OperandInfoTy Op(OP_NONE_);
5038     OperandInfoTy Stream(STREAM_ID_NONE_);
5039     if (parseSendMsgBody(Msg, Op, Stream) &&
5040         validateSendMsg(Msg, Op, Stream, Loc)) {
5041       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5042     }
5043   } else if (parseExpr(ImmVal)) {
5044     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5045       Error(Loc, "invalid immediate: only 16-bit values are legal");
5046   }
5047 
5048   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5049   return MatchOperand_Success;
5050 }
5051 
5052 bool AMDGPUOperand::isSendMsg() const {
5053   return isImmTy(ImmTySendMsg);
5054 }
5055 
5056 //===----------------------------------------------------------------------===//
5057 // v_interp
5058 //===----------------------------------------------------------------------===//
5059 
5060 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5061   if (getLexer().getKind() != AsmToken::Identifier)
5062     return MatchOperand_NoMatch;
5063 
5064   StringRef Str = Parser.getTok().getString();
5065   int Slot = StringSwitch<int>(Str)
5066     .Case("p10", 0)
5067     .Case("p20", 1)
5068     .Case("p0", 2)
5069     .Default(-1);
5070 
5071   SMLoc S = Parser.getTok().getLoc();
5072   if (Slot == -1)
5073     return MatchOperand_ParseFail;
5074 
5075   Parser.Lex();
5076   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5077                                               AMDGPUOperand::ImmTyInterpSlot));
5078   return MatchOperand_Success;
5079 }
5080 
5081 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5082   if (getLexer().getKind() != AsmToken::Identifier)
5083     return MatchOperand_NoMatch;
5084 
5085   StringRef Str = Parser.getTok().getString();
5086   if (!Str.startswith("attr"))
5087     return MatchOperand_NoMatch;
5088 
5089   StringRef Chan = Str.take_back(2);
5090   int AttrChan = StringSwitch<int>(Chan)
5091     .Case(".x", 0)
5092     .Case(".y", 1)
5093     .Case(".z", 2)
5094     .Case(".w", 3)
5095     .Default(-1);
5096   if (AttrChan == -1)
5097     return MatchOperand_ParseFail;
5098 
5099   Str = Str.drop_back(2).drop_front(4);
5100 
5101   uint8_t Attr;
5102   if (Str.getAsInteger(10, Attr))
5103     return MatchOperand_ParseFail;
5104 
5105   SMLoc S = Parser.getTok().getLoc();
5106   Parser.Lex();
5107   if (Attr > 63) {
5108     Error(S, "out of bounds attr");
5109     return MatchOperand_Success;
5110   }
5111 
5112   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5113 
5114   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5115                                               AMDGPUOperand::ImmTyInterpAttr));
5116   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5117                                               AMDGPUOperand::ImmTyAttrChan));
5118   return MatchOperand_Success;
5119 }
5120 
5121 //===----------------------------------------------------------------------===//
5122 // exp
5123 //===----------------------------------------------------------------------===//
5124 
5125 void AMDGPUAsmParser::errorExpTgt() {
5126   Error(Parser.getTok().getLoc(), "invalid exp target");
5127 }
5128 
5129 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5130                                                       uint8_t &Val) {
5131   if (Str == "null") {
5132     Val = 9;
5133     return MatchOperand_Success;
5134   }
5135 
5136   if (Str.startswith("mrt")) {
5137     Str = Str.drop_front(3);
5138     if (Str == "z") { // == mrtz
5139       Val = 8;
5140       return MatchOperand_Success;
5141     }
5142 
5143     if (Str.getAsInteger(10, Val))
5144       return MatchOperand_ParseFail;
5145 
5146     if (Val > 7)
5147       errorExpTgt();
5148 
5149     return MatchOperand_Success;
5150   }
5151 
5152   if (Str.startswith("pos")) {
5153     Str = Str.drop_front(3);
5154     if (Str.getAsInteger(10, Val))
5155       return MatchOperand_ParseFail;
5156 
5157     if (Val > 4 || (Val == 4 && !isGFX10()))
5158       errorExpTgt();
5159 
5160     Val += 12;
5161     return MatchOperand_Success;
5162   }
5163 
5164   if (isGFX10() && Str == "prim") {
5165     Val = 20;
5166     return MatchOperand_Success;
5167   }
5168 
5169   if (Str.startswith("param")) {
5170     Str = Str.drop_front(5);
5171     if (Str.getAsInteger(10, Val))
5172       return MatchOperand_ParseFail;
5173 
5174     if (Val >= 32)
5175       errorExpTgt();
5176 
5177     Val += 32;
5178     return MatchOperand_Success;
5179   }
5180 
5181   if (Str.startswith("invalid_target_")) {
5182     Str = Str.drop_front(15);
5183     if (Str.getAsInteger(10, Val))
5184       return MatchOperand_ParseFail;
5185 
5186     errorExpTgt();
5187     return MatchOperand_Success;
5188   }
5189 
5190   return MatchOperand_NoMatch;
5191 }
5192 
5193 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5194   uint8_t Val;
5195   StringRef Str = Parser.getTok().getString();
5196 
5197   auto Res = parseExpTgtImpl(Str, Val);
5198   if (Res != MatchOperand_Success)
5199     return Res;
5200 
5201   SMLoc S = Parser.getTok().getLoc();
5202   Parser.Lex();
5203 
5204   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5205                                               AMDGPUOperand::ImmTyExpTgt));
5206   return MatchOperand_Success;
5207 }
5208 
5209 //===----------------------------------------------------------------------===//
5210 // parser helpers
5211 //===----------------------------------------------------------------------===//
5212 
5213 bool
5214 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5215   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5216 }
5217 
5218 bool
5219 AMDGPUAsmParser::isId(const StringRef Id) const {
5220   return isId(getToken(), Id);
5221 }
5222 
5223 bool
5224 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5225   return getTokenKind() == Kind;
5226 }
5227 
5228 bool
5229 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5230   if (isId(Id)) {
5231     lex();
5232     return true;
5233   }
5234   return false;
5235 }
5236 
5237 bool
5238 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5239   if (isId(Id) && peekToken().is(Kind)) {
5240     lex();
5241     lex();
5242     return true;
5243   }
5244   return false;
5245 }
5246 
5247 bool
5248 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5249   if (isToken(Kind)) {
5250     lex();
5251     return true;
5252   }
5253   return false;
5254 }
5255 
5256 bool
5257 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5258                            const StringRef ErrMsg) {
5259   if (!trySkipToken(Kind)) {
5260     Error(getLoc(), ErrMsg);
5261     return false;
5262   }
5263   return true;
5264 }
5265 
5266 bool
5267 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5268   return !getParser().parseAbsoluteExpression(Imm);
5269 }
5270 
5271 bool
5272 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5273   SMLoc S = getLoc();
5274 
5275   const MCExpr *Expr;
5276   if (Parser.parseExpression(Expr))
5277     return false;
5278 
5279   int64_t IntVal;
5280   if (Expr->evaluateAsAbsolute(IntVal)) {
5281     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5282   } else {
5283     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5284   }
5285   return true;
5286 }
5287 
5288 bool
5289 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5290   if (isToken(AsmToken::String)) {
5291     Val = getToken().getStringContents();
5292     lex();
5293     return true;
5294   } else {
5295     Error(getLoc(), ErrMsg);
5296     return false;
5297   }
5298 }
5299 
5300 AsmToken
5301 AMDGPUAsmParser::getToken() const {
5302   return Parser.getTok();
5303 }
5304 
5305 AsmToken
5306 AMDGPUAsmParser::peekToken() {
5307   return getLexer().peekTok();
5308 }
5309 
5310 void
5311 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5312   auto TokCount = getLexer().peekTokens(Tokens);
5313 
5314   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5315     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5316 }
5317 
5318 AsmToken::TokenKind
5319 AMDGPUAsmParser::getTokenKind() const {
5320   return getLexer().getKind();
5321 }
5322 
5323 SMLoc
5324 AMDGPUAsmParser::getLoc() const {
5325   return getToken().getLoc();
5326 }
5327 
5328 StringRef
5329 AMDGPUAsmParser::getTokenStr() const {
5330   return getToken().getString();
5331 }
5332 
5333 void
5334 AMDGPUAsmParser::lex() {
5335   Parser.Lex();
5336 }
5337 
5338 //===----------------------------------------------------------------------===//
5339 // swizzle
5340 //===----------------------------------------------------------------------===//
5341 
5342 LLVM_READNONE
5343 static unsigned
5344 encodeBitmaskPerm(const unsigned AndMask,
5345                   const unsigned OrMask,
5346                   const unsigned XorMask) {
5347   using namespace llvm::AMDGPU::Swizzle;
5348 
5349   return BITMASK_PERM_ENC |
5350          (AndMask << BITMASK_AND_SHIFT) |
5351          (OrMask  << BITMASK_OR_SHIFT)  |
5352          (XorMask << BITMASK_XOR_SHIFT);
5353 }
5354 
5355 bool
5356 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5357                                       const unsigned MinVal,
5358                                       const unsigned MaxVal,
5359                                       const StringRef ErrMsg) {
5360   for (unsigned i = 0; i < OpNum; ++i) {
5361     if (!skipToken(AsmToken::Comma, "expected a comma")){
5362       return false;
5363     }
5364     SMLoc ExprLoc = Parser.getTok().getLoc();
5365     if (!parseExpr(Op[i])) {
5366       return false;
5367     }
5368     if (Op[i] < MinVal || Op[i] > MaxVal) {
5369       Error(ExprLoc, ErrMsg);
5370       return false;
5371     }
5372   }
5373 
5374   return true;
5375 }
5376 
5377 bool
5378 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5379   using namespace llvm::AMDGPU::Swizzle;
5380 
5381   int64_t Lane[LANE_NUM];
5382   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5383                            "expected a 2-bit lane id")) {
5384     Imm = QUAD_PERM_ENC;
5385     for (unsigned I = 0; I < LANE_NUM; ++I) {
5386       Imm |= Lane[I] << (LANE_SHIFT * I);
5387     }
5388     return true;
5389   }
5390   return false;
5391 }
5392 
5393 bool
5394 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5395   using namespace llvm::AMDGPU::Swizzle;
5396 
5397   SMLoc S = Parser.getTok().getLoc();
5398   int64_t GroupSize;
5399   int64_t LaneIdx;
5400 
5401   if (!parseSwizzleOperands(1, &GroupSize,
5402                             2, 32,
5403                             "group size must be in the interval [2,32]")) {
5404     return false;
5405   }
5406   if (!isPowerOf2_64(GroupSize)) {
5407     Error(S, "group size must be a power of two");
5408     return false;
5409   }
5410   if (parseSwizzleOperands(1, &LaneIdx,
5411                            0, GroupSize - 1,
5412                            "lane id must be in the interval [0,group size - 1]")) {
5413     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5414     return true;
5415   }
5416   return false;
5417 }
5418 
5419 bool
5420 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5421   using namespace llvm::AMDGPU::Swizzle;
5422 
5423   SMLoc S = Parser.getTok().getLoc();
5424   int64_t GroupSize;
5425 
5426   if (!parseSwizzleOperands(1, &GroupSize,
5427       2, 32, "group size must be in the interval [2,32]")) {
5428     return false;
5429   }
5430   if (!isPowerOf2_64(GroupSize)) {
5431     Error(S, "group size must be a power of two");
5432     return false;
5433   }
5434 
5435   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5436   return true;
5437 }
5438 
5439 bool
5440 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5441   using namespace llvm::AMDGPU::Swizzle;
5442 
5443   SMLoc S = Parser.getTok().getLoc();
5444   int64_t GroupSize;
5445 
5446   if (!parseSwizzleOperands(1, &GroupSize,
5447       1, 16, "group size must be in the interval [1,16]")) {
5448     return false;
5449   }
5450   if (!isPowerOf2_64(GroupSize)) {
5451     Error(S, "group size must be a power of two");
5452     return false;
5453   }
5454 
5455   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5456   return true;
5457 }
5458 
5459 bool
5460 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5461   using namespace llvm::AMDGPU::Swizzle;
5462 
5463   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5464     return false;
5465   }
5466 
5467   StringRef Ctl;
5468   SMLoc StrLoc = Parser.getTok().getLoc();
5469   if (!parseString(Ctl)) {
5470     return false;
5471   }
5472   if (Ctl.size() != BITMASK_WIDTH) {
5473     Error(StrLoc, "expected a 5-character mask");
5474     return false;
5475   }
5476 
5477   unsigned AndMask = 0;
5478   unsigned OrMask = 0;
5479   unsigned XorMask = 0;
5480 
5481   for (size_t i = 0; i < Ctl.size(); ++i) {
5482     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5483     switch(Ctl[i]) {
5484     default:
5485       Error(StrLoc, "invalid mask");
5486       return false;
5487     case '0':
5488       break;
5489     case '1':
5490       OrMask |= Mask;
5491       break;
5492     case 'p':
5493       AndMask |= Mask;
5494       break;
5495     case 'i':
5496       AndMask |= Mask;
5497       XorMask |= Mask;
5498       break;
5499     }
5500   }
5501 
5502   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5503   return true;
5504 }
5505 
5506 bool
5507 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5508 
5509   SMLoc OffsetLoc = Parser.getTok().getLoc();
5510 
5511   if (!parseExpr(Imm)) {
5512     return false;
5513   }
5514   if (!isUInt<16>(Imm)) {
5515     Error(OffsetLoc, "expected a 16-bit offset");
5516     return false;
5517   }
5518   return true;
5519 }
5520 
5521 bool
5522 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5523   using namespace llvm::AMDGPU::Swizzle;
5524 
5525   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5526 
5527     SMLoc ModeLoc = Parser.getTok().getLoc();
5528     bool Ok = false;
5529 
5530     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5531       Ok = parseSwizzleQuadPerm(Imm);
5532     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5533       Ok = parseSwizzleBitmaskPerm(Imm);
5534     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5535       Ok = parseSwizzleBroadcast(Imm);
5536     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5537       Ok = parseSwizzleSwap(Imm);
5538     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5539       Ok = parseSwizzleReverse(Imm);
5540     } else {
5541       Error(ModeLoc, "expected a swizzle mode");
5542     }
5543 
5544     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5545   }
5546 
5547   return false;
5548 }
5549 
5550 OperandMatchResultTy
5551 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5552   SMLoc S = Parser.getTok().getLoc();
5553   int64_t Imm = 0;
5554 
5555   if (trySkipId("offset")) {
5556 
5557     bool Ok = false;
5558     if (skipToken(AsmToken::Colon, "expected a colon")) {
5559       if (trySkipId("swizzle")) {
5560         Ok = parseSwizzleMacro(Imm);
5561       } else {
5562         Ok = parseSwizzleOffset(Imm);
5563       }
5564     }
5565 
5566     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5567 
5568     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5569   } else {
5570     // Swizzle "offset" operand is optional.
5571     // If it is omitted, try parsing other optional operands.
5572     return parseOptionalOpr(Operands);
5573   }
5574 }
5575 
5576 bool
5577 AMDGPUOperand::isSwizzle() const {
5578   return isImmTy(ImmTySwizzle);
5579 }
5580 
5581 //===----------------------------------------------------------------------===//
5582 // VGPR Index Mode
5583 //===----------------------------------------------------------------------===//
5584 
5585 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5586 
5587   using namespace llvm::AMDGPU::VGPRIndexMode;
5588 
5589   if (trySkipToken(AsmToken::RParen)) {
5590     return OFF;
5591   }
5592 
5593   int64_t Imm = 0;
5594 
5595   while (true) {
5596     unsigned Mode = 0;
5597     SMLoc S = Parser.getTok().getLoc();
5598 
5599     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5600       if (trySkipId(IdSymbolic[ModeId])) {
5601         Mode = 1 << ModeId;
5602         break;
5603       }
5604     }
5605 
5606     if (Mode == 0) {
5607       Error(S, (Imm == 0)?
5608                "expected a VGPR index mode or a closing parenthesis" :
5609                "expected a VGPR index mode");
5610       break;
5611     }
5612 
5613     if (Imm & Mode) {
5614       Error(S, "duplicate VGPR index mode");
5615       break;
5616     }
5617     Imm |= Mode;
5618 
5619     if (trySkipToken(AsmToken::RParen))
5620       break;
5621     if (!skipToken(AsmToken::Comma,
5622                    "expected a comma or a closing parenthesis"))
5623       break;
5624   }
5625 
5626   return Imm;
5627 }
5628 
5629 OperandMatchResultTy
5630 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5631 
5632   int64_t Imm = 0;
5633   SMLoc S = Parser.getTok().getLoc();
5634 
5635   if (getLexer().getKind() == AsmToken::Identifier &&
5636       Parser.getTok().getString() == "gpr_idx" &&
5637       getLexer().peekTok().is(AsmToken::LParen)) {
5638 
5639     Parser.Lex();
5640     Parser.Lex();
5641 
5642     // If parse failed, trigger an error but do not return error code
5643     // to avoid excessive error messages.
5644     Imm = parseGPRIdxMacro();
5645 
5646   } else {
5647     if (getParser().parseAbsoluteExpression(Imm))
5648       return MatchOperand_NoMatch;
5649     if (Imm < 0 || !isUInt<4>(Imm)) {
5650       Error(S, "invalid immediate: only 4-bit values are legal");
5651     }
5652   }
5653 
5654   Operands.push_back(
5655       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5656   return MatchOperand_Success;
5657 }
5658 
5659 bool AMDGPUOperand::isGPRIdxMode() const {
5660   return isImmTy(ImmTyGprIdxMode);
5661 }
5662 
5663 //===----------------------------------------------------------------------===//
5664 // sopp branch targets
5665 //===----------------------------------------------------------------------===//
5666 
5667 OperandMatchResultTy
5668 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5669 
5670   // Make sure we are not parsing something
5671   // that looks like a label or an expression but is not.
5672   // This will improve error messages.
5673   if (isRegister() || isModifier())
5674     return MatchOperand_NoMatch;
5675 
5676   if (parseExpr(Operands)) {
5677 
5678     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5679     assert(Opr.isImm() || Opr.isExpr());
5680     SMLoc Loc = Opr.getStartLoc();
5681 
5682     // Currently we do not support arbitrary expressions as branch targets.
5683     // Only labels and absolute expressions are accepted.
5684     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5685       Error(Loc, "expected an absolute expression or a label");
5686     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5687       Error(Loc, "expected a 16-bit signed jump offset");
5688     }
5689   }
5690 
5691   return MatchOperand_Success; // avoid excessive error messages
5692 }
5693 
5694 //===----------------------------------------------------------------------===//
5695 // Boolean holding registers
5696 //===----------------------------------------------------------------------===//
5697 
5698 OperandMatchResultTy
5699 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5700   return parseReg(Operands);
5701 }
5702 
5703 //===----------------------------------------------------------------------===//
5704 // mubuf
5705 //===----------------------------------------------------------------------===//
5706 
5707 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5708   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5709 }
5710 
5711 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5712   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5713 }
5714 
5715 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5716   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5717 }
5718 
5719 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5720                                const OperandVector &Operands,
5721                                bool IsAtomic,
5722                                bool IsAtomicReturn,
5723                                bool IsLds) {
5724   bool IsLdsOpcode = IsLds;
5725   bool HasLdsModifier = false;
5726   OptionalImmIndexMap OptionalIdx;
5727   assert(IsAtomicReturn ? IsAtomic : true);
5728   unsigned FirstOperandIdx = 1;
5729 
5730   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5731     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5732 
5733     // Add the register arguments
5734     if (Op.isReg()) {
5735       Op.addRegOperands(Inst, 1);
5736       // Insert a tied src for atomic return dst.
5737       // This cannot be postponed as subsequent calls to
5738       // addImmOperands rely on correct number of MC operands.
5739       if (IsAtomicReturn && i == FirstOperandIdx)
5740         Op.addRegOperands(Inst, 1);
5741       continue;
5742     }
5743 
5744     // Handle the case where soffset is an immediate
5745     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5746       Op.addImmOperands(Inst, 1);
5747       continue;
5748     }
5749 
5750     HasLdsModifier |= Op.isLDS();
5751 
5752     // Handle tokens like 'offen' which are sometimes hard-coded into the
5753     // asm string.  There are no MCInst operands for these.
5754     if (Op.isToken()) {
5755       continue;
5756     }
5757     assert(Op.isImm());
5758 
5759     // Handle optional arguments
5760     OptionalIdx[Op.getImmTy()] = i;
5761   }
5762 
5763   // This is a workaround for an llvm quirk which may result in an
5764   // incorrect instruction selection. Lds and non-lds versions of
5765   // MUBUF instructions are identical except that lds versions
5766   // have mandatory 'lds' modifier. However this modifier follows
5767   // optional modifiers and llvm asm matcher regards this 'lds'
5768   // modifier as an optional one. As a result, an lds version
5769   // of opcode may be selected even if it has no 'lds' modifier.
5770   if (IsLdsOpcode && !HasLdsModifier) {
5771     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5772     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5773       Inst.setOpcode(NoLdsOpcode);
5774       IsLdsOpcode = false;
5775     }
5776   }
5777 
5778   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5779   if (!IsAtomic) { // glc is hard-coded.
5780     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5781   }
5782   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5783 
5784   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5785     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5786   }
5787 
5788   if (isGFX10())
5789     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5790 }
5791 
5792 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5793   OptionalImmIndexMap OptionalIdx;
5794 
5795   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5796     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5797 
5798     // Add the register arguments
5799     if (Op.isReg()) {
5800       Op.addRegOperands(Inst, 1);
5801       continue;
5802     }
5803 
5804     // Handle the case where soffset is an immediate
5805     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5806       Op.addImmOperands(Inst, 1);
5807       continue;
5808     }
5809 
5810     // Handle tokens like 'offen' which are sometimes hard-coded into the
5811     // asm string.  There are no MCInst operands for these.
5812     if (Op.isToken()) {
5813       continue;
5814     }
5815     assert(Op.isImm());
5816 
5817     // Handle optional arguments
5818     OptionalIdx[Op.getImmTy()] = i;
5819   }
5820 
5821   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5822                         AMDGPUOperand::ImmTyOffset);
5823   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5824   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5825   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5826   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5827 
5828   if (isGFX10())
5829     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5830 }
5831 
5832 //===----------------------------------------------------------------------===//
5833 // mimg
5834 //===----------------------------------------------------------------------===//
5835 
5836 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5837                               bool IsAtomic) {
5838   unsigned I = 1;
5839   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5840   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5841     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5842   }
5843 
5844   if (IsAtomic) {
5845     // Add src, same as dst
5846     assert(Desc.getNumDefs() == 1);
5847     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5848   }
5849 
5850   OptionalImmIndexMap OptionalIdx;
5851 
5852   for (unsigned E = Operands.size(); I != E; ++I) {
5853     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5854 
5855     // Add the register arguments
5856     if (Op.isReg()) {
5857       Op.addRegOperands(Inst, 1);
5858     } else if (Op.isImmModifier()) {
5859       OptionalIdx[Op.getImmTy()] = I;
5860     } else if (!Op.isToken()) {
5861       llvm_unreachable("unexpected operand type");
5862     }
5863   }
5864 
5865   bool IsGFX10 = isGFX10();
5866 
5867   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5868   if (IsGFX10)
5869     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5870   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5871   if (IsGFX10)
5872     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5873   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5874   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5875   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5876   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5877   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5878   if (!IsGFX10)
5879     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5880   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5881 }
5882 
5883 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5884   cvtMIMG(Inst, Operands, true);
5885 }
5886 
5887 //===----------------------------------------------------------------------===//
5888 // smrd
5889 //===----------------------------------------------------------------------===//
5890 
5891 bool AMDGPUOperand::isSMRDOffset8() const {
5892   return isImm() && isUInt<8>(getImm());
5893 }
5894 
5895 bool AMDGPUOperand::isSMRDOffset20() const {
5896   return isImm() && isUInt<20>(getImm());
5897 }
5898 
5899 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5900   // 32-bit literals are only supported on CI and we only want to use them
5901   // when the offset is > 8-bits.
5902   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5903 }
5904 
5905 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5906   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5907 }
5908 
5909 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5910   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5911 }
5912 
5913 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5914   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5915 }
5916 
5917 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5918   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5919 }
5920 
5921 //===----------------------------------------------------------------------===//
5922 // vop3
5923 //===----------------------------------------------------------------------===//
5924 
5925 static bool ConvertOmodMul(int64_t &Mul) {
5926   if (Mul != 1 && Mul != 2 && Mul != 4)
5927     return false;
5928 
5929   Mul >>= 1;
5930   return true;
5931 }
5932 
5933 static bool ConvertOmodDiv(int64_t &Div) {
5934   if (Div == 1) {
5935     Div = 0;
5936     return true;
5937   }
5938 
5939   if (Div == 2) {
5940     Div = 3;
5941     return true;
5942   }
5943 
5944   return false;
5945 }
5946 
5947 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5948   if (BoundCtrl == 0) {
5949     BoundCtrl = 1;
5950     return true;
5951   }
5952 
5953   if (BoundCtrl == -1) {
5954     BoundCtrl = 0;
5955     return true;
5956   }
5957 
5958   return false;
5959 }
5960 
5961 // Note: the order in this table matches the order of operands in AsmString.
5962 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5963   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5964   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5965   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5966   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5967   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5968   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5969   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5970   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5971   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5972   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5973   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5974   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5975   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5976   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5977   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5978   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5979   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5980   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5981   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5982   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5983   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5984   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5985   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5986   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5987   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5988   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5989   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5990   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5991   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5992   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5993   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5994   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5995   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5996   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5997   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5998   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5999   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6000   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6001   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6002   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6003   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6004   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6005   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6006 };
6007 
6008 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6009   unsigned size = Operands.size();
6010   assert(size > 0);
6011 
6012   OperandMatchResultTy res = parseOptionalOpr(Operands);
6013 
6014   // This is a hack to enable hardcoded mandatory operands which follow
6015   // optional operands.
6016   //
6017   // Current design assumes that all operands after the first optional operand
6018   // are also optional. However implementation of some instructions violates
6019   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6020   //
6021   // To alleviate this problem, we have to (implicitly) parse extra operands
6022   // to make sure autogenerated parser of custom operands never hit hardcoded
6023   // mandatory operands.
6024 
6025   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6026 
6027     // We have parsed the first optional operand.
6028     // Parse as many operands as necessary to skip all mandatory operands.
6029 
6030     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6031       if (res != MatchOperand_Success ||
6032           getLexer().is(AsmToken::EndOfStatement)) break;
6033       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6034       res = parseOptionalOpr(Operands);
6035     }
6036   }
6037 
6038   return res;
6039 }
6040 
6041 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6042   OperandMatchResultTy res;
6043   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6044     // try to parse any optional operand here
6045     if (Op.IsBit) {
6046       res = parseNamedBit(Op.Name, Operands, Op.Type);
6047     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6048       res = parseOModOperand(Operands);
6049     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6050                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6051                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6052       res = parseSDWASel(Operands, Op.Name, Op.Type);
6053     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6054       res = parseSDWADstUnused(Operands);
6055     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6056                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6057                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6058                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6059       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6060                                         Op.ConvertResult);
6061     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6062       res = parseDim(Operands);
6063     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6064       res = parseDfmtNfmt(Operands);
6065     } else {
6066       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6067     }
6068     if (res != MatchOperand_NoMatch) {
6069       return res;
6070     }
6071   }
6072   return MatchOperand_NoMatch;
6073 }
6074 
6075 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6076   StringRef Name = Parser.getTok().getString();
6077   if (Name == "mul") {
6078     return parseIntWithPrefix("mul", Operands,
6079                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6080   }
6081 
6082   if (Name == "div") {
6083     return parseIntWithPrefix("div", Operands,
6084                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6085   }
6086 
6087   return MatchOperand_NoMatch;
6088 }
6089 
6090 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6091   cvtVOP3P(Inst, Operands);
6092 
6093   int Opc = Inst.getOpcode();
6094 
6095   int SrcNum;
6096   const int Ops[] = { AMDGPU::OpName::src0,
6097                       AMDGPU::OpName::src1,
6098                       AMDGPU::OpName::src2 };
6099   for (SrcNum = 0;
6100        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6101        ++SrcNum);
6102   assert(SrcNum > 0);
6103 
6104   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6105   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6106 
6107   if ((OpSel & (1 << SrcNum)) != 0) {
6108     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6109     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6110     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6111   }
6112 }
6113 
6114 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6115       // 1. This operand is input modifiers
6116   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6117       // 2. This is not last operand
6118       && Desc.NumOperands > (OpNum + 1)
6119       // 3. Next operand is register class
6120       && Desc.OpInfo[OpNum + 1].RegClass != -1
6121       // 4. Next register is not tied to any other operand
6122       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6123 }
6124 
6125 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6126 {
6127   OptionalImmIndexMap OptionalIdx;
6128   unsigned Opc = Inst.getOpcode();
6129 
6130   unsigned I = 1;
6131   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6132   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6133     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6134   }
6135 
6136   for (unsigned E = Operands.size(); I != E; ++I) {
6137     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6138     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6139       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6140     } else if (Op.isInterpSlot() ||
6141                Op.isInterpAttr() ||
6142                Op.isAttrChan()) {
6143       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6144     } else if (Op.isImmModifier()) {
6145       OptionalIdx[Op.getImmTy()] = I;
6146     } else {
6147       llvm_unreachable("unhandled operand type");
6148     }
6149   }
6150 
6151   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6152     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6153   }
6154 
6155   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6156     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6157   }
6158 
6159   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6160     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6161   }
6162 }
6163 
6164 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6165                               OptionalImmIndexMap &OptionalIdx) {
6166   unsigned Opc = Inst.getOpcode();
6167 
6168   unsigned I = 1;
6169   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6170   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6171     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6172   }
6173 
6174   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6175     // This instruction has src modifiers
6176     for (unsigned E = Operands.size(); I != E; ++I) {
6177       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6178       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6179         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6180       } else if (Op.isImmModifier()) {
6181         OptionalIdx[Op.getImmTy()] = I;
6182       } else if (Op.isRegOrImm()) {
6183         Op.addRegOrImmOperands(Inst, 1);
6184       } else {
6185         llvm_unreachable("unhandled operand type");
6186       }
6187     }
6188   } else {
6189     // No src modifiers
6190     for (unsigned E = Operands.size(); I != E; ++I) {
6191       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6192       if (Op.isMod()) {
6193         OptionalIdx[Op.getImmTy()] = I;
6194       } else {
6195         Op.addRegOrImmOperands(Inst, 1);
6196       }
6197     }
6198   }
6199 
6200   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6201     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6202   }
6203 
6204   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6205     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6206   }
6207 
6208   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6209   // it has src2 register operand that is tied to dst operand
6210   // we don't allow modifiers for this operand in assembler so src2_modifiers
6211   // should be 0.
6212   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6213       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6214       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6215       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6216       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6217       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6218       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6219     auto it = Inst.begin();
6220     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6221     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6222     ++it;
6223     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6224   }
6225 }
6226 
6227 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6228   OptionalImmIndexMap OptionalIdx;
6229   cvtVOP3(Inst, Operands, OptionalIdx);
6230 }
6231 
6232 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6233                                const OperandVector &Operands) {
6234   OptionalImmIndexMap OptIdx;
6235   const int Opc = Inst.getOpcode();
6236   const MCInstrDesc &Desc = MII.get(Opc);
6237 
6238   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6239 
6240   cvtVOP3(Inst, Operands, OptIdx);
6241 
6242   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6243     assert(!IsPacked);
6244     Inst.addOperand(Inst.getOperand(0));
6245   }
6246 
6247   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6248   // instruction, and then figure out where to actually put the modifiers
6249 
6250   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6251 
6252   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6253   if (OpSelHiIdx != -1) {
6254     int DefaultVal = IsPacked ? -1 : 0;
6255     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6256                           DefaultVal);
6257   }
6258 
6259   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6260   if (NegLoIdx != -1) {
6261     assert(IsPacked);
6262     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6263     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6264   }
6265 
6266   const int Ops[] = { AMDGPU::OpName::src0,
6267                       AMDGPU::OpName::src1,
6268                       AMDGPU::OpName::src2 };
6269   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6270                          AMDGPU::OpName::src1_modifiers,
6271                          AMDGPU::OpName::src2_modifiers };
6272 
6273   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6274 
6275   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6276   unsigned OpSelHi = 0;
6277   unsigned NegLo = 0;
6278   unsigned NegHi = 0;
6279 
6280   if (OpSelHiIdx != -1) {
6281     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6282   }
6283 
6284   if (NegLoIdx != -1) {
6285     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6286     NegLo = Inst.getOperand(NegLoIdx).getImm();
6287     NegHi = Inst.getOperand(NegHiIdx).getImm();
6288   }
6289 
6290   for (int J = 0; J < 3; ++J) {
6291     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6292     if (OpIdx == -1)
6293       break;
6294 
6295     uint32_t ModVal = 0;
6296 
6297     if ((OpSel & (1 << J)) != 0)
6298       ModVal |= SISrcMods::OP_SEL_0;
6299 
6300     if ((OpSelHi & (1 << J)) != 0)
6301       ModVal |= SISrcMods::OP_SEL_1;
6302 
6303     if ((NegLo & (1 << J)) != 0)
6304       ModVal |= SISrcMods::NEG;
6305 
6306     if ((NegHi & (1 << J)) != 0)
6307       ModVal |= SISrcMods::NEG_HI;
6308 
6309     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6310 
6311     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6312   }
6313 }
6314 
6315 //===----------------------------------------------------------------------===//
6316 // dpp
6317 //===----------------------------------------------------------------------===//
6318 
6319 bool AMDGPUOperand::isDPP8() const {
6320   return isImmTy(ImmTyDPP8);
6321 }
6322 
6323 bool AMDGPUOperand::isDPPCtrl() const {
6324   using namespace AMDGPU::DPP;
6325 
6326   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6327   if (result) {
6328     int64_t Imm = getImm();
6329     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6330            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6331            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6332            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6333            (Imm == DppCtrl::WAVE_SHL1) ||
6334            (Imm == DppCtrl::WAVE_ROL1) ||
6335            (Imm == DppCtrl::WAVE_SHR1) ||
6336            (Imm == DppCtrl::WAVE_ROR1) ||
6337            (Imm == DppCtrl::ROW_MIRROR) ||
6338            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6339            (Imm == DppCtrl::BCAST15) ||
6340            (Imm == DppCtrl::BCAST31) ||
6341            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6342            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6343   }
6344   return false;
6345 }
6346 
6347 //===----------------------------------------------------------------------===//
6348 // mAI
6349 //===----------------------------------------------------------------------===//
6350 
6351 bool AMDGPUOperand::isBLGP() const {
6352   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6353 }
6354 
6355 bool AMDGPUOperand::isCBSZ() const {
6356   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6357 }
6358 
6359 bool AMDGPUOperand::isABID() const {
6360   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6361 }
6362 
6363 bool AMDGPUOperand::isS16Imm() const {
6364   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6365 }
6366 
6367 bool AMDGPUOperand::isU16Imm() const {
6368   return isImm() && isUInt<16>(getImm());
6369 }
6370 
6371 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6372   if (!isGFX10())
6373     return MatchOperand_NoMatch;
6374 
6375   SMLoc S = Parser.getTok().getLoc();
6376 
6377   if (getLexer().isNot(AsmToken::Identifier))
6378     return MatchOperand_NoMatch;
6379   if (getLexer().getTok().getString() != "dim")
6380     return MatchOperand_NoMatch;
6381 
6382   Parser.Lex();
6383   if (getLexer().isNot(AsmToken::Colon))
6384     return MatchOperand_ParseFail;
6385 
6386   Parser.Lex();
6387 
6388   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6389   // integer.
6390   std::string Token;
6391   if (getLexer().is(AsmToken::Integer)) {
6392     SMLoc Loc = getLexer().getTok().getEndLoc();
6393     Token = getLexer().getTok().getString();
6394     Parser.Lex();
6395     if (getLexer().getTok().getLoc() != Loc)
6396       return MatchOperand_ParseFail;
6397   }
6398   if (getLexer().isNot(AsmToken::Identifier))
6399     return MatchOperand_ParseFail;
6400   Token += getLexer().getTok().getString();
6401 
6402   StringRef DimId = Token;
6403   if (DimId.startswith("SQ_RSRC_IMG_"))
6404     DimId = DimId.substr(12);
6405 
6406   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6407   if (!DimInfo)
6408     return MatchOperand_ParseFail;
6409 
6410   Parser.Lex();
6411 
6412   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6413                                               AMDGPUOperand::ImmTyDim));
6414   return MatchOperand_Success;
6415 }
6416 
6417 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6418   SMLoc S = Parser.getTok().getLoc();
6419   StringRef Prefix;
6420 
6421   if (getLexer().getKind() == AsmToken::Identifier) {
6422     Prefix = Parser.getTok().getString();
6423   } else {
6424     return MatchOperand_NoMatch;
6425   }
6426 
6427   if (Prefix != "dpp8")
6428     return parseDPPCtrl(Operands);
6429   if (!isGFX10())
6430     return MatchOperand_NoMatch;
6431 
6432   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6433 
6434   int64_t Sels[8];
6435 
6436   Parser.Lex();
6437   if (getLexer().isNot(AsmToken::Colon))
6438     return MatchOperand_ParseFail;
6439 
6440   Parser.Lex();
6441   if (getLexer().isNot(AsmToken::LBrac))
6442     return MatchOperand_ParseFail;
6443 
6444   Parser.Lex();
6445   if (getParser().parseAbsoluteExpression(Sels[0]))
6446     return MatchOperand_ParseFail;
6447   if (0 > Sels[0] || 7 < Sels[0])
6448     return MatchOperand_ParseFail;
6449 
6450   for (size_t i = 1; i < 8; ++i) {
6451     if (getLexer().isNot(AsmToken::Comma))
6452       return MatchOperand_ParseFail;
6453 
6454     Parser.Lex();
6455     if (getParser().parseAbsoluteExpression(Sels[i]))
6456       return MatchOperand_ParseFail;
6457     if (0 > Sels[i] || 7 < Sels[i])
6458       return MatchOperand_ParseFail;
6459   }
6460 
6461   if (getLexer().isNot(AsmToken::RBrac))
6462     return MatchOperand_ParseFail;
6463   Parser.Lex();
6464 
6465   unsigned DPP8 = 0;
6466   for (size_t i = 0; i < 8; ++i)
6467     DPP8 |= (Sels[i] << (i * 3));
6468 
6469   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6470   return MatchOperand_Success;
6471 }
6472 
6473 OperandMatchResultTy
6474 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6475   using namespace AMDGPU::DPP;
6476 
6477   SMLoc S = Parser.getTok().getLoc();
6478   StringRef Prefix;
6479   int64_t Int;
6480 
6481   if (getLexer().getKind() == AsmToken::Identifier) {
6482     Prefix = Parser.getTok().getString();
6483   } else {
6484     return MatchOperand_NoMatch;
6485   }
6486 
6487   if (Prefix == "row_mirror") {
6488     Int = DppCtrl::ROW_MIRROR;
6489     Parser.Lex();
6490   } else if (Prefix == "row_half_mirror") {
6491     Int = DppCtrl::ROW_HALF_MIRROR;
6492     Parser.Lex();
6493   } else {
6494     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6495     if (Prefix != "quad_perm"
6496         && Prefix != "row_shl"
6497         && Prefix != "row_shr"
6498         && Prefix != "row_ror"
6499         && Prefix != "wave_shl"
6500         && Prefix != "wave_rol"
6501         && Prefix != "wave_shr"
6502         && Prefix != "wave_ror"
6503         && Prefix != "row_bcast"
6504         && Prefix != "row_share"
6505         && Prefix != "row_xmask") {
6506       return MatchOperand_NoMatch;
6507     }
6508 
6509     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6510       return MatchOperand_NoMatch;
6511 
6512     if (!isVI() && !isGFX9() &&
6513         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6514          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6515          Prefix == "row_bcast"))
6516       return MatchOperand_NoMatch;
6517 
6518     Parser.Lex();
6519     if (getLexer().isNot(AsmToken::Colon))
6520       return MatchOperand_ParseFail;
6521 
6522     if (Prefix == "quad_perm") {
6523       // quad_perm:[%d,%d,%d,%d]
6524       Parser.Lex();
6525       if (getLexer().isNot(AsmToken::LBrac))
6526         return MatchOperand_ParseFail;
6527       Parser.Lex();
6528 
6529       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6530         return MatchOperand_ParseFail;
6531 
6532       for (int i = 0; i < 3; ++i) {
6533         if (getLexer().isNot(AsmToken::Comma))
6534           return MatchOperand_ParseFail;
6535         Parser.Lex();
6536 
6537         int64_t Temp;
6538         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6539           return MatchOperand_ParseFail;
6540         const int shift = i*2 + 2;
6541         Int += (Temp << shift);
6542       }
6543 
6544       if (getLexer().isNot(AsmToken::RBrac))
6545         return MatchOperand_ParseFail;
6546       Parser.Lex();
6547     } else {
6548       // sel:%d
6549       Parser.Lex();
6550       if (getParser().parseAbsoluteExpression(Int))
6551         return MatchOperand_ParseFail;
6552 
6553       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6554         Int |= DppCtrl::ROW_SHL0;
6555       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6556         Int |= DppCtrl::ROW_SHR0;
6557       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6558         Int |= DppCtrl::ROW_ROR0;
6559       } else if (Prefix == "wave_shl" && 1 == Int) {
6560         Int = DppCtrl::WAVE_SHL1;
6561       } else if (Prefix == "wave_rol" && 1 == Int) {
6562         Int = DppCtrl::WAVE_ROL1;
6563       } else if (Prefix == "wave_shr" && 1 == Int) {
6564         Int = DppCtrl::WAVE_SHR1;
6565       } else if (Prefix == "wave_ror" && 1 == Int) {
6566         Int = DppCtrl::WAVE_ROR1;
6567       } else if (Prefix == "row_bcast") {
6568         if (Int == 15) {
6569           Int = DppCtrl::BCAST15;
6570         } else if (Int == 31) {
6571           Int = DppCtrl::BCAST31;
6572         } else {
6573           return MatchOperand_ParseFail;
6574         }
6575       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6576         Int |= DppCtrl::ROW_SHARE_FIRST;
6577       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6578         Int |= DppCtrl::ROW_XMASK_FIRST;
6579       } else {
6580         return MatchOperand_ParseFail;
6581       }
6582     }
6583   }
6584 
6585   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6586   return MatchOperand_Success;
6587 }
6588 
6589 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6590   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6591 }
6592 
6593 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6594   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6595 }
6596 
6597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6598   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6599 }
6600 
6601 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6602   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6603 }
6604 
6605 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6606   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6607 }
6608 
6609 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6610   OptionalImmIndexMap OptionalIdx;
6611 
6612   unsigned I = 1;
6613   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6614   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6615     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6616   }
6617 
6618   int Fi = 0;
6619   for (unsigned E = Operands.size(); I != E; ++I) {
6620     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6621                                             MCOI::TIED_TO);
6622     if (TiedTo != -1) {
6623       assert((unsigned)TiedTo < Inst.getNumOperands());
6624       // handle tied old or src2 for MAC instructions
6625       Inst.addOperand(Inst.getOperand(TiedTo));
6626     }
6627     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6628     // Add the register arguments
6629     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6630       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6631       // Skip it.
6632       continue;
6633     }
6634 
6635     if (IsDPP8) {
6636       if (Op.isDPP8()) {
6637         Op.addImmOperands(Inst, 1);
6638       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6639         Op.addRegWithFPInputModsOperands(Inst, 2);
6640       } else if (Op.isFI()) {
6641         Fi = Op.getImm();
6642       } else if (Op.isReg()) {
6643         Op.addRegOperands(Inst, 1);
6644       } else {
6645         llvm_unreachable("Invalid operand type");
6646       }
6647     } else {
6648       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6649         Op.addRegWithFPInputModsOperands(Inst, 2);
6650       } else if (Op.isDPPCtrl()) {
6651         Op.addImmOperands(Inst, 1);
6652       } else if (Op.isImm()) {
6653         // Handle optional arguments
6654         OptionalIdx[Op.getImmTy()] = I;
6655       } else {
6656         llvm_unreachable("Invalid operand type");
6657       }
6658     }
6659   }
6660 
6661   if (IsDPP8) {
6662     using namespace llvm::AMDGPU::DPP;
6663     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6664   } else {
6665     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6666     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6667     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6668     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6669       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6670     }
6671   }
6672 }
6673 
6674 //===----------------------------------------------------------------------===//
6675 // sdwa
6676 //===----------------------------------------------------------------------===//
6677 
6678 OperandMatchResultTy
6679 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6680                               AMDGPUOperand::ImmTy Type) {
6681   using namespace llvm::AMDGPU::SDWA;
6682 
6683   SMLoc S = Parser.getTok().getLoc();
6684   StringRef Value;
6685   OperandMatchResultTy res;
6686 
6687   res = parseStringWithPrefix(Prefix, Value);
6688   if (res != MatchOperand_Success) {
6689     return res;
6690   }
6691 
6692   int64_t Int;
6693   Int = StringSwitch<int64_t>(Value)
6694         .Case("BYTE_0", SdwaSel::BYTE_0)
6695         .Case("BYTE_1", SdwaSel::BYTE_1)
6696         .Case("BYTE_2", SdwaSel::BYTE_2)
6697         .Case("BYTE_3", SdwaSel::BYTE_3)
6698         .Case("WORD_0", SdwaSel::WORD_0)
6699         .Case("WORD_1", SdwaSel::WORD_1)
6700         .Case("DWORD", SdwaSel::DWORD)
6701         .Default(0xffffffff);
6702   Parser.Lex(); // eat last token
6703 
6704   if (Int == 0xffffffff) {
6705     return MatchOperand_ParseFail;
6706   }
6707 
6708   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6709   return MatchOperand_Success;
6710 }
6711 
6712 OperandMatchResultTy
6713 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6714   using namespace llvm::AMDGPU::SDWA;
6715 
6716   SMLoc S = Parser.getTok().getLoc();
6717   StringRef Value;
6718   OperandMatchResultTy res;
6719 
6720   res = parseStringWithPrefix("dst_unused", Value);
6721   if (res != MatchOperand_Success) {
6722     return res;
6723   }
6724 
6725   int64_t Int;
6726   Int = StringSwitch<int64_t>(Value)
6727         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6728         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6729         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6730         .Default(0xffffffff);
6731   Parser.Lex(); // eat last token
6732 
6733   if (Int == 0xffffffff) {
6734     return MatchOperand_ParseFail;
6735   }
6736 
6737   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6738   return MatchOperand_Success;
6739 }
6740 
6741 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6742   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6743 }
6744 
6745 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6746   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6747 }
6748 
6749 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6750   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6751 }
6752 
6753 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6754   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6755 }
6756 
6757 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6758                               uint64_t BasicInstType, bool skipVcc) {
6759   using namespace llvm::AMDGPU::SDWA;
6760 
6761   OptionalImmIndexMap OptionalIdx;
6762   bool skippedVcc = false;
6763 
6764   unsigned I = 1;
6765   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6766   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6767     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6768   }
6769 
6770   for (unsigned E = Operands.size(); I != E; ++I) {
6771     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6772     if (skipVcc && !skippedVcc && Op.isReg() &&
6773         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6774       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6775       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6776       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6777       // Skip VCC only if we didn't skip it on previous iteration.
6778       if (BasicInstType == SIInstrFlags::VOP2 &&
6779           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6780         skippedVcc = true;
6781         continue;
6782       } else if (BasicInstType == SIInstrFlags::VOPC &&
6783                  Inst.getNumOperands() == 0) {
6784         skippedVcc = true;
6785         continue;
6786       }
6787     }
6788     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6789       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6790     } else if (Op.isImm()) {
6791       // Handle optional arguments
6792       OptionalIdx[Op.getImmTy()] = I;
6793     } else {
6794       llvm_unreachable("Invalid operand type");
6795     }
6796     skippedVcc = false;
6797   }
6798 
6799   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6800       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6801       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6802     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6803     switch (BasicInstType) {
6804     case SIInstrFlags::VOP1:
6805       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6806       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6807         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6808       }
6809       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6810       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6811       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6812       break;
6813 
6814     case SIInstrFlags::VOP2:
6815       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6816       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6817         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6818       }
6819       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6820       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6821       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6822       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6823       break;
6824 
6825     case SIInstrFlags::VOPC:
6826       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6827         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6828       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6829       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6830       break;
6831 
6832     default:
6833       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6834     }
6835   }
6836 
6837   // special case v_mac_{f16, f32}:
6838   // it has src2 register operand that is tied to dst operand
6839   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6840       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6841     auto it = Inst.begin();
6842     std::advance(
6843       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6844     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6845   }
6846 }
6847 
6848 //===----------------------------------------------------------------------===//
6849 // mAI
6850 //===----------------------------------------------------------------------===//
6851 
6852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6853   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6854 }
6855 
6856 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6857   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6858 }
6859 
6860 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6861   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6862 }
6863 
6864 /// Force static initialization.
6865 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6866   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6867   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6868 }
6869 
6870 #define GET_REGISTER_MATCHER
6871 #define GET_MATCHER_IMPLEMENTATION
6872 #define GET_MNEMONIC_SPELL_CHECKER
6873 #include "AMDGPUGenAsmMatcher.inc"
6874 
6875 // This fuction should be defined after auto-generated include so that we have
6876 // MatchClassKind enum defined
6877 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6878                                                      unsigned Kind) {
6879   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6880   // But MatchInstructionImpl() expects to meet token and fails to validate
6881   // operand. This method checks if we are given immediate operand but expect to
6882   // get corresponding token.
6883   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6884   switch (Kind) {
6885   case MCK_addr64:
6886     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6887   case MCK_gds:
6888     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6889   case MCK_lds:
6890     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6891   case MCK_glc:
6892     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6893   case MCK_idxen:
6894     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6895   case MCK_offen:
6896     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6897   case MCK_SSrcB32:
6898     // When operands have expression values, they will return true for isToken,
6899     // because it is not possible to distinguish between a token and an
6900     // expression at parse time. MatchInstructionImpl() will always try to
6901     // match an operand as a token, when isToken returns true, and when the
6902     // name of the expression is not a valid token, the match will fail,
6903     // so we need to handle it here.
6904     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6905   case MCK_SSrcF32:
6906     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6907   case MCK_SoppBrTarget:
6908     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6909   case MCK_VReg32OrOff:
6910     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6911   case MCK_InterpSlot:
6912     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6913   case MCK_Attr:
6914     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6915   case MCK_AttrChan:
6916     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6917   default:
6918     return Match_InvalidOperand;
6919   }
6920 }
6921 
6922 //===----------------------------------------------------------------------===//
6923 // endpgm
6924 //===----------------------------------------------------------------------===//
6925 
6926 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6927   SMLoc S = Parser.getTok().getLoc();
6928   int64_t Imm = 0;
6929 
6930   if (!parseExpr(Imm)) {
6931     // The operand is optional, if not present default to 0
6932     Imm = 0;
6933   }
6934 
6935   if (!isUInt<16>(Imm)) {
6936     Error(S, "expected a 16-bit value");
6937     return MatchOperand_ParseFail;
6938   }
6939 
6940   Operands.push_back(
6941       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6942   return MatchOperand_Success;
6943 }
6944 
6945 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6946