1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyLWE,
167     ImmTyExpTgt,
168     ImmTyExpCompr,
169     ImmTyExpVM,
170     ImmTyFORMAT,
171     ImmTyHwreg,
172     ImmTyOff,
173     ImmTySendMsg,
174     ImmTyInterpSlot,
175     ImmTyInterpAttr,
176     ImmTyAttrChan,
177     ImmTyOpSel,
178     ImmTyOpSelHi,
179     ImmTyNegLo,
180     ImmTyNegHi,
181     ImmTySwizzle,
182     ImmTyGprIdxMode,
183     ImmTyHigh,
184     ImmTyBLGP,
185     ImmTyCBSZ,
186     ImmTyABID,
187     ImmTyEndpgm,
188   };
189 
190 private:
191   struct TokOp {
192     const char *Data;
193     unsigned Length;
194   };
195 
196   struct ImmOp {
197     int64_t Val;
198     ImmTy Type;
199     bool IsFPImm;
200     Modifiers Mods;
201   };
202 
203   struct RegOp {
204     unsigned RegNo;
205     Modifiers Mods;
206   };
207 
208   union {
209     TokOp Tok;
210     ImmOp Imm;
211     RegOp Reg;
212     const MCExpr *Expr;
213   };
214 
215 public:
216   bool isToken() const override {
217     if (Kind == Token)
218       return true;
219 
220     // When parsing operands, we can't always tell if something was meant to be
221     // a token, like 'gds', or an expression that references a global variable.
222     // In this case, we assume the string is an expression, and if we need to
223     // interpret is a token, then we treat the symbol name as the token.
224     return isSymbolRefExpr();
225   }
226 
227   bool isSymbolRefExpr() const {
228     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229   }
230 
231   bool isImm() const override {
232     return Kind == Immediate;
233   }
234 
235   bool isInlinableImm(MVT type) const;
236   bool isLiteralImm(MVT type) const;
237 
238   bool isRegKind() const {
239     return Kind == Register;
240   }
241 
242   bool isReg() const override {
243     return isRegKind() && !hasModifiers();
244   }
245 
246   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
248   }
249 
250   bool isRegOrImmWithInt16InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252   }
253 
254   bool isRegOrImmWithInt32InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
256   }
257 
258   bool isRegOrImmWithInt64InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
260   }
261 
262   bool isRegOrImmWithFP16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
264   }
265 
266   bool isRegOrImmWithFP32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
268   }
269 
270   bool isRegOrImmWithFP64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
272   }
273 
274   bool isVReg() const {
275     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
276            isRegClass(AMDGPU::VReg_64RegClassID) ||
277            isRegClass(AMDGPU::VReg_96RegClassID) ||
278            isRegClass(AMDGPU::VReg_128RegClassID) ||
279            isRegClass(AMDGPU::VReg_160RegClassID) ||
280            isRegClass(AMDGPU::VReg_256RegClassID) ||
281            isRegClass(AMDGPU::VReg_512RegClassID) ||
282            isRegClass(AMDGPU::VReg_1024RegClassID);
283   }
284 
285   bool isVReg32() const {
286     return isRegClass(AMDGPU::VGPR_32RegClassID);
287   }
288 
289   bool isVReg32OrOff() const {
290     return isOff() || isVReg32();
291   }
292 
293   bool isSDWAOperand(MVT type) const;
294   bool isSDWAFP16Operand() const;
295   bool isSDWAFP32Operand() const;
296   bool isSDWAInt16Operand() const;
297   bool isSDWAInt32Operand() const;
298 
299   bool isImmTy(ImmTy ImmT) const {
300     return isImm() && Imm.Type == ImmT;
301   }
302 
303   bool isImmModifier() const {
304     return isImm() && Imm.Type != ImmTyNone;
305   }
306 
307   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
308   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
309   bool isDMask() const { return isImmTy(ImmTyDMask); }
310   bool isDim() const { return isImmTy(ImmTyDim); }
311   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
312   bool isDA() const { return isImmTy(ImmTyDA); }
313   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
314   bool isLWE() const { return isImmTy(ImmTyLWE); }
315   bool isOff() const { return isImmTy(ImmTyOff); }
316   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
317   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
318   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
319   bool isOffen() const { return isImmTy(ImmTyOffen); }
320   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
321   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
322   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
323   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
324   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
325 
326   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
327   bool isGDS() const { return isImmTy(ImmTyGDS); }
328   bool isLDS() const { return isImmTy(ImmTyLDS); }
329   bool isDLC() const { return isImmTy(ImmTyDLC); }
330   bool isGLC() const { return isImmTy(ImmTyGLC); }
331   bool isSLC() const { return isImmTy(ImmTySLC); }
332   bool isSWZ() const { return isImmTy(ImmTySWZ); }
333   bool isTFE() const { return isImmTy(ImmTyTFE); }
334   bool isD16() const { return isImmTy(ImmTyD16); }
335   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
336   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
337   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
338   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
339   bool isFI() const { return isImmTy(ImmTyDppFi); }
340   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
341   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
342   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
343   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
344   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
345   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
346   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
347   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
348   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
349   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
350   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
351   bool isHigh() const { return isImmTy(ImmTyHigh); }
352 
353   bool isMod() const {
354     return isClampSI() || isOModSI();
355   }
356 
357   bool isRegOrImm() const {
358     return isReg() || isImm();
359   }
360 
361   bool isRegClass(unsigned RCID) const;
362 
363   bool isInlineValue() const;
364 
365   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
366     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
367   }
368 
369   bool isSCSrcB16() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
371   }
372 
373   bool isSCSrcV2B16() const {
374     return isSCSrcB16();
375   }
376 
377   bool isSCSrcB32() const {
378     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
379   }
380 
381   bool isSCSrcB64() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
383   }
384 
385   bool isBoolReg() const;
386 
387   bool isSCSrcF16() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
389   }
390 
391   bool isSCSrcV2F16() const {
392     return isSCSrcF16();
393   }
394 
395   bool isSCSrcF32() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
397   }
398 
399   bool isSCSrcF64() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
401   }
402 
403   bool isSSrcB32() const {
404     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
405   }
406 
407   bool isSSrcB16() const {
408     return isSCSrcB16() || isLiteralImm(MVT::i16);
409   }
410 
411   bool isSSrcV2B16() const {
412     llvm_unreachable("cannot happen");
413     return isSSrcB16();
414   }
415 
416   bool isSSrcB64() const {
417     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
418     // See isVSrc64().
419     return isSCSrcB64() || isLiteralImm(MVT::i64);
420   }
421 
422   bool isSSrcF32() const {
423     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
424   }
425 
426   bool isSSrcF64() const {
427     return isSCSrcB64() || isLiteralImm(MVT::f64);
428   }
429 
430   bool isSSrcF16() const {
431     return isSCSrcB16() || isLiteralImm(MVT::f16);
432   }
433 
434   bool isSSrcV2F16() const {
435     llvm_unreachable("cannot happen");
436     return isSSrcF16();
437   }
438 
439   bool isSSrcOrLdsB32() const {
440     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
441            isLiteralImm(MVT::i32) || isExpr();
442   }
443 
444   bool isVCSrcB32() const {
445     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
446   }
447 
448   bool isVCSrcB64() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
450   }
451 
452   bool isVCSrcB16() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
454   }
455 
456   bool isVCSrcV2B16() const {
457     return isVCSrcB16();
458   }
459 
460   bool isVCSrcF32() const {
461     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
462   }
463 
464   bool isVCSrcF64() const {
465     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
466   }
467 
468   bool isVCSrcF16() const {
469     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
470   }
471 
472   bool isVCSrcV2F16() const {
473     return isVCSrcF16();
474   }
475 
476   bool isVSrcB32() const {
477     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVSrcB64() const {
481     return isVCSrcF64() || isLiteralImm(MVT::i64);
482   }
483 
484   bool isVSrcB16() const {
485     return isVCSrcF16() || isLiteralImm(MVT::i16);
486   }
487 
488   bool isVSrcV2B16() const {
489     return isVSrcB16() || isLiteralImm(MVT::v2i16);
490   }
491 
492   bool isVSrcF32() const {
493     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
494   }
495 
496   bool isVSrcF64() const {
497     return isVCSrcF64() || isLiteralImm(MVT::f64);
498   }
499 
500   bool isVSrcF16() const {
501     return isVCSrcF16() || isLiteralImm(MVT::f16);
502   }
503 
504   bool isVSrcV2F16() const {
505     return isVSrcF16() || isLiteralImm(MVT::v2f16);
506   }
507 
508   bool isVISrcB32() const {
509     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
510   }
511 
512   bool isVISrcB16() const {
513     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
514   }
515 
516   bool isVISrcV2B16() const {
517     return isVISrcB16();
518   }
519 
520   bool isVISrcF32() const {
521     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
522   }
523 
524   bool isVISrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
526   }
527 
528   bool isVISrcV2F16() const {
529     return isVISrcF16() || isVISrcB32();
530   }
531 
532   bool isAISrcB32() const {
533     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
534   }
535 
536   bool isAISrcB16() const {
537     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
538   }
539 
540   bool isAISrcV2B16() const {
541     return isAISrcB16();
542   }
543 
544   bool isAISrcF32() const {
545     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
546   }
547 
548   bool isAISrcF16() const {
549     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
550   }
551 
552   bool isAISrcV2F16() const {
553     return isAISrcF16() || isAISrcB32();
554   }
555 
556   bool isAISrc_128B32() const {
557     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
558   }
559 
560   bool isAISrc_128B16() const {
561     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
562   }
563 
564   bool isAISrc_128V2B16() const {
565     return isAISrc_128B16();
566   }
567 
568   bool isAISrc_128F32() const {
569     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
570   }
571 
572   bool isAISrc_128F16() const {
573     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
574   }
575 
576   bool isAISrc_128V2F16() const {
577     return isAISrc_128F16() || isAISrc_128B32();
578   }
579 
580   bool isAISrc_512B32() const {
581     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
582   }
583 
584   bool isAISrc_512B16() const {
585     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
586   }
587 
588   bool isAISrc_512V2B16() const {
589     return isAISrc_512B16();
590   }
591 
592   bool isAISrc_512F32() const {
593     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
594   }
595 
596   bool isAISrc_512F16() const {
597     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
598   }
599 
600   bool isAISrc_512V2F16() const {
601     return isAISrc_512F16() || isAISrc_512B32();
602   }
603 
604   bool isAISrc_1024B32() const {
605     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
606   }
607 
608   bool isAISrc_1024B16() const {
609     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
610   }
611 
612   bool isAISrc_1024V2B16() const {
613     return isAISrc_1024B16();
614   }
615 
616   bool isAISrc_1024F32() const {
617     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
618   }
619 
620   bool isAISrc_1024F16() const {
621     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
622   }
623 
624   bool isAISrc_1024V2F16() const {
625     return isAISrc_1024F16() || isAISrc_1024B32();
626   }
627 
628   bool isKImmFP32() const {
629     return isLiteralImm(MVT::f32);
630   }
631 
632   bool isKImmFP16() const {
633     return isLiteralImm(MVT::f16);
634   }
635 
636   bool isMem() const override {
637     return false;
638   }
639 
640   bool isExpr() const {
641     return Kind == Expression;
642   }
643 
644   bool isSoppBrTarget() const {
645     return isExpr() || isImm();
646   }
647 
648   bool isSWaitCnt() const;
649   bool isHwreg() const;
650   bool isSendMsg() const;
651   bool isSwizzle() const;
652   bool isSMRDOffset8() const;
653   bool isSMRDOffset20() const;
654   bool isSMRDLiteralOffset() const;
655   bool isDPP8() const;
656   bool isDPPCtrl() const;
657   bool isBLGP() const;
658   bool isCBSZ() const;
659   bool isABID() const;
660   bool isGPRIdxMode() const;
661   bool isS16Imm() const;
662   bool isU16Imm() const;
663   bool isEndpgm() const;
664 
665   StringRef getExpressionAsToken() const {
666     assert(isExpr());
667     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
668     return S->getSymbol().getName();
669   }
670 
671   StringRef getToken() const {
672     assert(isToken());
673 
674     if (Kind == Expression)
675       return getExpressionAsToken();
676 
677     return StringRef(Tok.Data, Tok.Length);
678   }
679 
680   int64_t getImm() const {
681     assert(isImm());
682     return Imm.Val;
683   }
684 
685   ImmTy getImmTy() const {
686     assert(isImm());
687     return Imm.Type;
688   }
689 
690   unsigned getReg() const override {
691     assert(isRegKind());
692     return Reg.RegNo;
693   }
694 
695   SMLoc getStartLoc() const override {
696     return StartLoc;
697   }
698 
699   SMLoc getEndLoc() const override {
700     return EndLoc;
701   }
702 
703   SMRange getLocRange() const {
704     return SMRange(StartLoc, EndLoc);
705   }
706 
707   Modifiers getModifiers() const {
708     assert(isRegKind() || isImmTy(ImmTyNone));
709     return isRegKind() ? Reg.Mods : Imm.Mods;
710   }
711 
712   void setModifiers(Modifiers Mods) {
713     assert(isRegKind() || isImmTy(ImmTyNone));
714     if (isRegKind())
715       Reg.Mods = Mods;
716     else
717       Imm.Mods = Mods;
718   }
719 
720   bool hasModifiers() const {
721     return getModifiers().hasModifiers();
722   }
723 
724   bool hasFPModifiers() const {
725     return getModifiers().hasFPModifiers();
726   }
727 
728   bool hasIntModifiers() const {
729     return getModifiers().hasIntModifiers();
730   }
731 
732   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
733 
734   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
735 
736   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
737 
738   template <unsigned Bitwidth>
739   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
740 
741   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
742     addKImmFPOperands<16>(Inst, N);
743   }
744 
745   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
746     addKImmFPOperands<32>(Inst, N);
747   }
748 
749   void addRegOperands(MCInst &Inst, unsigned N) const;
750 
751   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
752     addRegOperands(Inst, N);
753   }
754 
755   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
756     if (isRegKind())
757       addRegOperands(Inst, N);
758     else if (isExpr())
759       Inst.addOperand(MCOperand::createExpr(Expr));
760     else
761       addImmOperands(Inst, N);
762   }
763 
764   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
765     Modifiers Mods = getModifiers();
766     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
767     if (isRegKind()) {
768       addRegOperands(Inst, N);
769     } else {
770       addImmOperands(Inst, N, false);
771     }
772   }
773 
774   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
775     assert(!hasIntModifiers());
776     addRegOrImmWithInputModsOperands(Inst, N);
777   }
778 
779   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
780     assert(!hasFPModifiers());
781     addRegOrImmWithInputModsOperands(Inst, N);
782   }
783 
784   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
785     Modifiers Mods = getModifiers();
786     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
787     assert(isRegKind());
788     addRegOperands(Inst, N);
789   }
790 
791   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasIntModifiers());
793     addRegWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
797     assert(!hasFPModifiers());
798     addRegWithInputModsOperands(Inst, N);
799   }
800 
801   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
802     if (isImm())
803       addImmOperands(Inst, N);
804     else {
805       assert(isExpr());
806       Inst.addOperand(MCOperand::createExpr(Expr));
807     }
808   }
809 
810   static void printImmTy(raw_ostream& OS, ImmTy Type) {
811     switch (Type) {
812     case ImmTyNone: OS << "None"; break;
813     case ImmTyGDS: OS << "GDS"; break;
814     case ImmTyLDS: OS << "LDS"; break;
815     case ImmTyOffen: OS << "Offen"; break;
816     case ImmTyIdxen: OS << "Idxen"; break;
817     case ImmTyAddr64: OS << "Addr64"; break;
818     case ImmTyOffset: OS << "Offset"; break;
819     case ImmTyInstOffset: OS << "InstOffset"; break;
820     case ImmTyOffset0: OS << "Offset0"; break;
821     case ImmTyOffset1: OS << "Offset1"; break;
822     case ImmTyDLC: OS << "DLC"; break;
823     case ImmTyGLC: OS << "GLC"; break;
824     case ImmTySLC: OS << "SLC"; break;
825     case ImmTySWZ: OS << "SWZ"; break;
826     case ImmTyTFE: OS << "TFE"; break;
827     case ImmTyD16: OS << "D16"; break;
828     case ImmTyFORMAT: OS << "FORMAT"; break;
829     case ImmTyClampSI: OS << "ClampSI"; break;
830     case ImmTyOModSI: OS << "OModSI"; break;
831     case ImmTyDPP8: OS << "DPP8"; break;
832     case ImmTyDppCtrl: OS << "DppCtrl"; break;
833     case ImmTyDppRowMask: OS << "DppRowMask"; break;
834     case ImmTyDppBankMask: OS << "DppBankMask"; break;
835     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
836     case ImmTyDppFi: OS << "FI"; break;
837     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
838     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
839     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
840     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
841     case ImmTyDMask: OS << "DMask"; break;
842     case ImmTyDim: OS << "Dim"; break;
843     case ImmTyUNorm: OS << "UNorm"; break;
844     case ImmTyDA: OS << "DA"; break;
845     case ImmTyR128A16: OS << "R128A16"; break;
846     case ImmTyLWE: OS << "LWE"; break;
847     case ImmTyOff: OS << "Off"; break;
848     case ImmTyExpTgt: OS << "ExpTgt"; break;
849     case ImmTyExpCompr: OS << "ExpCompr"; break;
850     case ImmTyExpVM: OS << "ExpVM"; break;
851     case ImmTyHwreg: OS << "Hwreg"; break;
852     case ImmTySendMsg: OS << "SendMsg"; break;
853     case ImmTyInterpSlot: OS << "InterpSlot"; break;
854     case ImmTyInterpAttr: OS << "InterpAttr"; break;
855     case ImmTyAttrChan: OS << "AttrChan"; break;
856     case ImmTyOpSel: OS << "OpSel"; break;
857     case ImmTyOpSelHi: OS << "OpSelHi"; break;
858     case ImmTyNegLo: OS << "NegLo"; break;
859     case ImmTyNegHi: OS << "NegHi"; break;
860     case ImmTySwizzle: OS << "Swizzle"; break;
861     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
862     case ImmTyHigh: OS << "High"; break;
863     case ImmTyBLGP: OS << "BLGP"; break;
864     case ImmTyCBSZ: OS << "CBSZ"; break;
865     case ImmTyABID: OS << "ABID"; break;
866     case ImmTyEndpgm: OS << "Endpgm"; break;
867     }
868   }
869 
870   void print(raw_ostream &OS) const override {
871     switch (Kind) {
872     case Register:
873       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
874       break;
875     case Immediate:
876       OS << '<' << getImm();
877       if (getImmTy() != ImmTyNone) {
878         OS << " type: "; printImmTy(OS, getImmTy());
879       }
880       OS << " mods: " << Imm.Mods << '>';
881       break;
882     case Token:
883       OS << '\'' << getToken() << '\'';
884       break;
885     case Expression:
886       OS << "<expr " << *Expr << '>';
887       break;
888     }
889   }
890 
891   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
892                                       int64_t Val, SMLoc Loc,
893                                       ImmTy Type = ImmTyNone,
894                                       bool IsFPImm = false) {
895     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
896     Op->Imm.Val = Val;
897     Op->Imm.IsFPImm = IsFPImm;
898     Op->Imm.Type = Type;
899     Op->Imm.Mods = Modifiers();
900     Op->StartLoc = Loc;
901     Op->EndLoc = Loc;
902     return Op;
903   }
904 
905   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
906                                         StringRef Str, SMLoc Loc,
907                                         bool HasExplicitEncodingSize = true) {
908     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
909     Res->Tok.Data = Str.data();
910     Res->Tok.Length = Str.size();
911     Res->StartLoc = Loc;
912     Res->EndLoc = Loc;
913     return Res;
914   }
915 
916   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
917                                       unsigned RegNo, SMLoc S,
918                                       SMLoc E) {
919     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
920     Op->Reg.RegNo = RegNo;
921     Op->Reg.Mods = Modifiers();
922     Op->StartLoc = S;
923     Op->EndLoc = E;
924     return Op;
925   }
926 
927   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
928                                        const class MCExpr *Expr, SMLoc S) {
929     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
930     Op->Expr = Expr;
931     Op->StartLoc = S;
932     Op->EndLoc = S;
933     return Op;
934   }
935 };
936 
937 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
938   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
939   return OS;
940 }
941 
942 //===----------------------------------------------------------------------===//
943 // AsmParser
944 //===----------------------------------------------------------------------===//
945 
946 // Holds info related to the current kernel, e.g. count of SGPRs used.
947 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
948 // .amdgpu_hsa_kernel or at EOF.
949 class KernelScopeInfo {
950   int SgprIndexUnusedMin = -1;
951   int VgprIndexUnusedMin = -1;
952   MCContext *Ctx = nullptr;
953 
954   void usesSgprAt(int i) {
955     if (i >= SgprIndexUnusedMin) {
956       SgprIndexUnusedMin = ++i;
957       if (Ctx) {
958         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
959         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
960       }
961     }
962   }
963 
964   void usesVgprAt(int i) {
965     if (i >= VgprIndexUnusedMin) {
966       VgprIndexUnusedMin = ++i;
967       if (Ctx) {
968         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
969         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
970       }
971     }
972   }
973 
974 public:
975   KernelScopeInfo() = default;
976 
977   void initialize(MCContext &Context) {
978     Ctx = &Context;
979     usesSgprAt(SgprIndexUnusedMin = -1);
980     usesVgprAt(VgprIndexUnusedMin = -1);
981   }
982 
983   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
984     switch (RegKind) {
985       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
986       case IS_AGPR: // fall through
987       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
988       default: break;
989     }
990   }
991 };
992 
993 class AMDGPUAsmParser : public MCTargetAsmParser {
994   MCAsmParser &Parser;
995 
996   // Number of extra operands parsed after the first optional operand.
997   // This may be necessary to skip hardcoded mandatory operands.
998   static const unsigned MAX_OPR_LOOKAHEAD = 8;
999 
1000   unsigned ForcedEncodingSize = 0;
1001   bool ForcedDPP = false;
1002   bool ForcedSDWA = false;
1003   KernelScopeInfo KernelScope;
1004 
1005   /// @name Auto-generated Match Functions
1006   /// {
1007 
1008 #define GET_ASSEMBLER_HEADER
1009 #include "AMDGPUGenAsmMatcher.inc"
1010 
1011   /// }
1012 
1013 private:
1014   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1015   bool OutOfRangeError(SMRange Range);
1016   /// Calculate VGPR/SGPR blocks required for given target, reserved
1017   /// registers, and user-specified NextFreeXGPR values.
1018   ///
1019   /// \param Features [in] Target features, used for bug corrections.
1020   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1021   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1022   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1023   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1024   /// descriptor field, if valid.
1025   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1026   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1027   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1028   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1029   /// \param VGPRBlocks [out] Result VGPR block count.
1030   /// \param SGPRBlocks [out] Result SGPR block count.
1031   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1032                           bool FlatScrUsed, bool XNACKUsed,
1033                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1034                           SMRange VGPRRange, unsigned NextFreeSGPR,
1035                           SMRange SGPRRange, unsigned &VGPRBlocks,
1036                           unsigned &SGPRBlocks);
1037   bool ParseDirectiveAMDGCNTarget();
1038   bool ParseDirectiveAMDHSAKernel();
1039   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1040   bool ParseDirectiveHSACodeObjectVersion();
1041   bool ParseDirectiveHSACodeObjectISA();
1042   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1043   bool ParseDirectiveAMDKernelCodeT();
1044   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1045   bool ParseDirectiveAMDGPUHsaKernel();
1046 
1047   bool ParseDirectiveISAVersion();
1048   bool ParseDirectiveHSAMetadata();
1049   bool ParseDirectivePALMetadataBegin();
1050   bool ParseDirectivePALMetadata();
1051   bool ParseDirectiveAMDGPULDS();
1052 
1053   /// Common code to parse out a block of text (typically YAML) between start and
1054   /// end directives.
1055   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1056                            const char *AssemblerDirectiveEnd,
1057                            std::string &CollectString);
1058 
1059   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1060                              RegisterKind RegKind, unsigned Reg1);
1061   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1062                            unsigned& RegNum, unsigned& RegWidth);
1063   unsigned ParseRegularReg(RegisterKind &RegKind,
1064                            unsigned &RegNum,
1065                            unsigned &RegWidth);
1066   unsigned ParseSpecialReg(RegisterKind &RegKind,
1067                            unsigned &RegNum,
1068                            unsigned &RegWidth);
1069   unsigned ParseRegList(RegisterKind &RegKind,
1070                         unsigned &RegNum,
1071                         unsigned &RegWidth);
1072   bool ParseRegRange(unsigned& Num, unsigned& Width);
1073   unsigned getRegularReg(RegisterKind RegKind,
1074                          unsigned RegNum,
1075                          unsigned RegWidth);
1076 
1077   bool isRegister();
1078   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1079   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1080   void initializeGprCountSymbol(RegisterKind RegKind);
1081   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1082                              unsigned RegWidth);
1083   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1084                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1085   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1086                  bool IsGdsHardcoded);
1087 
1088 public:
1089   enum AMDGPUMatchResultTy {
1090     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1091   };
1092   enum OperandMode {
1093     OperandMode_Default,
1094     OperandMode_NSA,
1095   };
1096 
1097   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1098 
1099   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1100                const MCInstrInfo &MII,
1101                const MCTargetOptions &Options)
1102       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1103     MCAsmParserExtension::Initialize(Parser);
1104 
1105     if (getFeatureBits().none()) {
1106       // Set default features.
1107       copySTI().ToggleFeature("southern-islands");
1108     }
1109 
1110     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1111 
1112     {
1113       // TODO: make those pre-defined variables read-only.
1114       // Currently there is none suitable machinery in the core llvm-mc for this.
1115       // MCSymbol::isRedefinable is intended for another purpose, and
1116       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1117       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1118       MCContext &Ctx = getContext();
1119       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1120         MCSymbol *Sym =
1121             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1122         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1123         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1124         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1125         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1126         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1127       } else {
1128         MCSymbol *Sym =
1129             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1130         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1131         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1132         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1133         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1134         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1135       }
1136       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1137         initializeGprCountSymbol(IS_VGPR);
1138         initializeGprCountSymbol(IS_SGPR);
1139       } else
1140         KernelScope.initialize(getContext());
1141     }
1142   }
1143 
1144   bool hasXNACK() const {
1145     return AMDGPU::hasXNACK(getSTI());
1146   }
1147 
1148   bool hasMIMG_R128() const {
1149     return AMDGPU::hasMIMG_R128(getSTI());
1150   }
1151 
1152   bool hasPackedD16() const {
1153     return AMDGPU::hasPackedD16(getSTI());
1154   }
1155 
1156   bool isSI() const {
1157     return AMDGPU::isSI(getSTI());
1158   }
1159 
1160   bool isCI() const {
1161     return AMDGPU::isCI(getSTI());
1162   }
1163 
1164   bool isVI() const {
1165     return AMDGPU::isVI(getSTI());
1166   }
1167 
1168   bool isGFX9() const {
1169     return AMDGPU::isGFX9(getSTI());
1170   }
1171 
1172   bool isGFX10() const {
1173     return AMDGPU::isGFX10(getSTI());
1174   }
1175 
1176   bool hasInv2PiInlineImm() const {
1177     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1178   }
1179 
1180   bool hasFlatOffsets() const {
1181     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1182   }
1183 
1184   bool hasSGPR102_SGPR103() const {
1185     return !isVI() && !isGFX9();
1186   }
1187 
1188   bool hasSGPR104_SGPR105() const {
1189     return isGFX10();
1190   }
1191 
1192   bool hasIntClamp() const {
1193     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1194   }
1195 
1196   AMDGPUTargetStreamer &getTargetStreamer() {
1197     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1198     return static_cast<AMDGPUTargetStreamer &>(TS);
1199   }
1200 
1201   const MCRegisterInfo *getMRI() const {
1202     // We need this const_cast because for some reason getContext() is not const
1203     // in MCAsmParser.
1204     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1205   }
1206 
1207   const MCInstrInfo *getMII() const {
1208     return &MII;
1209   }
1210 
1211   const FeatureBitset &getFeatureBits() const {
1212     return getSTI().getFeatureBits();
1213   }
1214 
1215   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1216   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1217   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1218 
1219   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1220   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1221   bool isForcedDPP() const { return ForcedDPP; }
1222   bool isForcedSDWA() const { return ForcedSDWA; }
1223   ArrayRef<unsigned> getMatchedVariants() const;
1224 
1225   std::unique_ptr<AMDGPUOperand> parseRegister();
1226   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1227   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1228   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1229                                       unsigned Kind) override;
1230   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1231                                OperandVector &Operands, MCStreamer &Out,
1232                                uint64_t &ErrorInfo,
1233                                bool MatchingInlineAsm) override;
1234   bool ParseDirective(AsmToken DirectiveID) override;
1235   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1236                                     OperandMode Mode = OperandMode_Default);
1237   StringRef parseMnemonicSuffix(StringRef Name);
1238   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1239                         SMLoc NameLoc, OperandVector &Operands) override;
1240   //bool ProcessInstruction(MCInst &Inst);
1241 
1242   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1243 
1244   OperandMatchResultTy
1245   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1246                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1247                      bool (*ConvertResult)(int64_t &) = nullptr);
1248 
1249   OperandMatchResultTy
1250   parseOperandArrayWithPrefix(const char *Prefix,
1251                               OperandVector &Operands,
1252                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1253                               bool (*ConvertResult)(int64_t&) = nullptr);
1254 
1255   OperandMatchResultTy
1256   parseNamedBit(const char *Name, OperandVector &Operands,
1257                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1258   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1259                                              StringRef &Value);
1260 
1261   bool isModifier();
1262   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1263   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1264   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1265   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1266   bool parseSP3NegModifier();
1267   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1268   OperandMatchResultTy parseReg(OperandVector &Operands);
1269   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1270   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1271   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1272   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1273   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1274   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1275   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1276 
1277   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1278   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1279   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1280   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1281 
1282   bool parseCnt(int64_t &IntVal);
1283   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1284   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1285 
1286 private:
1287   struct OperandInfoTy {
1288     int64_t Id;
1289     bool IsSymbolic = false;
1290     bool IsDefined = false;
1291 
1292     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1293   };
1294 
1295   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1296   bool validateSendMsg(const OperandInfoTy &Msg,
1297                        const OperandInfoTy &Op,
1298                        const OperandInfoTy &Stream,
1299                        const SMLoc Loc);
1300 
1301   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1302   bool validateHwreg(const OperandInfoTy &HwReg,
1303                      const int64_t Offset,
1304                      const int64_t Width,
1305                      const SMLoc Loc);
1306 
1307   void errorExpTgt();
1308   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1309   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1310 
1311   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1312   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1313   bool validateSOPLiteral(const MCInst &Inst) const;
1314   bool validateConstantBusLimitations(const MCInst &Inst);
1315   bool validateEarlyClobberLimitations(const MCInst &Inst);
1316   bool validateIntClampSupported(const MCInst &Inst);
1317   bool validateMIMGAtomicDMask(const MCInst &Inst);
1318   bool validateMIMGGatherDMask(const MCInst &Inst);
1319   bool validateMIMGDataSize(const MCInst &Inst);
1320   bool validateMIMGAddrSize(const MCInst &Inst);
1321   bool validateMIMGD16(const MCInst &Inst);
1322   bool validateMIMGDim(const MCInst &Inst);
1323   bool validateLdsDirect(const MCInst &Inst);
1324   bool validateOpSel(const MCInst &Inst);
1325   bool validateVccOperand(unsigned Reg) const;
1326   bool validateVOP3Literal(const MCInst &Inst) const;
1327   unsigned getConstantBusLimit(unsigned Opcode) const;
1328   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1329   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1330   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1331 
1332   bool isId(const StringRef Id) const;
1333   bool isId(const AsmToken &Token, const StringRef Id) const;
1334   bool isToken(const AsmToken::TokenKind Kind) const;
1335   bool trySkipId(const StringRef Id);
1336   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1337   bool trySkipToken(const AsmToken::TokenKind Kind);
1338   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1339   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1340   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1341   AsmToken::TokenKind getTokenKind() const;
1342   bool parseExpr(int64_t &Imm);
1343   bool parseExpr(OperandVector &Operands);
1344   StringRef getTokenStr() const;
1345   AsmToken peekToken();
1346   AsmToken getToken() const;
1347   SMLoc getLoc() const;
1348   void lex();
1349 
1350 public:
1351   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1352   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1353 
1354   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1355   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1356   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1357   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1358   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1359   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1360 
1361   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1362                             const unsigned MinVal,
1363                             const unsigned MaxVal,
1364                             const StringRef ErrMsg);
1365   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1366   bool parseSwizzleOffset(int64_t &Imm);
1367   bool parseSwizzleMacro(int64_t &Imm);
1368   bool parseSwizzleQuadPerm(int64_t &Imm);
1369   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1370   bool parseSwizzleBroadcast(int64_t &Imm);
1371   bool parseSwizzleSwap(int64_t &Imm);
1372   bool parseSwizzleReverse(int64_t &Imm);
1373 
1374   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1375   int64_t parseGPRIdxMacro();
1376 
1377   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1378   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1379   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1380   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1381   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1382 
1383   AMDGPUOperand::Ptr defaultDLC() const;
1384   AMDGPUOperand::Ptr defaultGLC() const;
1385   AMDGPUOperand::Ptr defaultSLC() const;
1386 
1387   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1388   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1389   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1390   AMDGPUOperand::Ptr defaultFlatOffset() const;
1391 
1392   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1393 
1394   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1395                OptionalImmIndexMap &OptionalIdx);
1396   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1397   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1398   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1399 
1400   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1401 
1402   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1403                bool IsAtomic = false);
1404   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1405 
1406   OperandMatchResultTy parseDim(OperandVector &Operands);
1407   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1408   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1409   AMDGPUOperand::Ptr defaultRowMask() const;
1410   AMDGPUOperand::Ptr defaultBankMask() const;
1411   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1412   AMDGPUOperand::Ptr defaultFI() const;
1413   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1414   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1415 
1416   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1417                                     AMDGPUOperand::ImmTy Type);
1418   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1419   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1420   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1421   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1422   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1423   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1424                 uint64_t BasicInstType, bool skipVcc = false);
1425 
1426   AMDGPUOperand::Ptr defaultBLGP() const;
1427   AMDGPUOperand::Ptr defaultCBSZ() const;
1428   AMDGPUOperand::Ptr defaultABID() const;
1429 
1430   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1431   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1432 };
1433 
1434 struct OptionalOperand {
1435   const char *Name;
1436   AMDGPUOperand::ImmTy Type;
1437   bool IsBit;
1438   bool (*ConvertResult)(int64_t&);
1439 };
1440 
1441 } // end anonymous namespace
1442 
1443 // May be called with integer type with equivalent bitwidth.
1444 static const fltSemantics *getFltSemantics(unsigned Size) {
1445   switch (Size) {
1446   case 4:
1447     return &APFloat::IEEEsingle();
1448   case 8:
1449     return &APFloat::IEEEdouble();
1450   case 2:
1451     return &APFloat::IEEEhalf();
1452   default:
1453     llvm_unreachable("unsupported fp type");
1454   }
1455 }
1456 
1457 static const fltSemantics *getFltSemantics(MVT VT) {
1458   return getFltSemantics(VT.getSizeInBits() / 8);
1459 }
1460 
1461 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1462   switch (OperandType) {
1463   case AMDGPU::OPERAND_REG_IMM_INT32:
1464   case AMDGPU::OPERAND_REG_IMM_FP32:
1465   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1466   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1467   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1468   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1469     return &APFloat::IEEEsingle();
1470   case AMDGPU::OPERAND_REG_IMM_INT64:
1471   case AMDGPU::OPERAND_REG_IMM_FP64:
1472   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1473   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1474     return &APFloat::IEEEdouble();
1475   case AMDGPU::OPERAND_REG_IMM_INT16:
1476   case AMDGPU::OPERAND_REG_IMM_FP16:
1477   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1478   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1479   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1480   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1481   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1482   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1483   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1484   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1485   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1486   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1487     return &APFloat::IEEEhalf();
1488   default:
1489     llvm_unreachable("unsupported fp type");
1490   }
1491 }
1492 
1493 //===----------------------------------------------------------------------===//
1494 // Operand
1495 //===----------------------------------------------------------------------===//
1496 
1497 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1498   bool Lost;
1499 
1500   // Convert literal to single precision
1501   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1502                                                APFloat::rmNearestTiesToEven,
1503                                                &Lost);
1504   // We allow precision lost but not overflow or underflow
1505   if (Status != APFloat::opOK &&
1506       Lost &&
1507       ((Status & APFloat::opOverflow)  != 0 ||
1508        (Status & APFloat::opUnderflow) != 0)) {
1509     return false;
1510   }
1511 
1512   return true;
1513 }
1514 
1515 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1516   return isUIntN(Size, Val) || isIntN(Size, Val);
1517 }
1518 
1519 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1520 
1521   // This is a hack to enable named inline values like
1522   // shared_base with both 32-bit and 64-bit operands.
1523   // Note that these values are defined as
1524   // 32-bit operands only.
1525   if (isInlineValue()) {
1526     return true;
1527   }
1528 
1529   if (!isImmTy(ImmTyNone)) {
1530     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1531     return false;
1532   }
1533   // TODO: We should avoid using host float here. It would be better to
1534   // check the float bit values which is what a few other places do.
1535   // We've had bot failures before due to weird NaN support on mips hosts.
1536 
1537   APInt Literal(64, Imm.Val);
1538 
1539   if (Imm.IsFPImm) { // We got fp literal token
1540     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1541       return AMDGPU::isInlinableLiteral64(Imm.Val,
1542                                           AsmParser->hasInv2PiInlineImm());
1543     }
1544 
1545     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1546     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1547       return false;
1548 
1549     if (type.getScalarSizeInBits() == 16) {
1550       return AMDGPU::isInlinableLiteral16(
1551         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1552         AsmParser->hasInv2PiInlineImm());
1553     }
1554 
1555     // Check if single precision literal is inlinable
1556     return AMDGPU::isInlinableLiteral32(
1557       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1558       AsmParser->hasInv2PiInlineImm());
1559   }
1560 
1561   // We got int literal token.
1562   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1563     return AMDGPU::isInlinableLiteral64(Imm.Val,
1564                                         AsmParser->hasInv2PiInlineImm());
1565   }
1566 
1567   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1568     return false;
1569   }
1570 
1571   if (type.getScalarSizeInBits() == 16) {
1572     return AMDGPU::isInlinableLiteral16(
1573       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1574       AsmParser->hasInv2PiInlineImm());
1575   }
1576 
1577   return AMDGPU::isInlinableLiteral32(
1578     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1579     AsmParser->hasInv2PiInlineImm());
1580 }
1581 
1582 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1583   // Check that this immediate can be added as literal
1584   if (!isImmTy(ImmTyNone)) {
1585     return false;
1586   }
1587 
1588   if (!Imm.IsFPImm) {
1589     // We got int literal token.
1590 
1591     if (type == MVT::f64 && hasFPModifiers()) {
1592       // Cannot apply fp modifiers to int literals preserving the same semantics
1593       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1594       // disable these cases.
1595       return false;
1596     }
1597 
1598     unsigned Size = type.getSizeInBits();
1599     if (Size == 64)
1600       Size = 32;
1601 
1602     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1603     // types.
1604     return isSafeTruncation(Imm.Val, Size);
1605   }
1606 
1607   // We got fp literal token
1608   if (type == MVT::f64) { // Expected 64-bit fp operand
1609     // We would set low 64-bits of literal to zeroes but we accept this literals
1610     return true;
1611   }
1612 
1613   if (type == MVT::i64) { // Expected 64-bit int operand
1614     // We don't allow fp literals in 64-bit integer instructions. It is
1615     // unclear how we should encode them.
1616     return false;
1617   }
1618 
1619   // We allow fp literals with f16x2 operands assuming that the specified
1620   // literal goes into the lower half and the upper half is zero. We also
1621   // require that the literal may be losslesly converted to f16.
1622   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1623                      (type == MVT::v2i16)? MVT::i16 : type;
1624 
1625   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1626   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1627 }
1628 
1629 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1630   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1631 }
1632 
1633 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1634   if (AsmParser->isVI())
1635     return isVReg32();
1636   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1637     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1638   else
1639     return false;
1640 }
1641 
1642 bool AMDGPUOperand::isSDWAFP16Operand() const {
1643   return isSDWAOperand(MVT::f16);
1644 }
1645 
1646 bool AMDGPUOperand::isSDWAFP32Operand() const {
1647   return isSDWAOperand(MVT::f32);
1648 }
1649 
1650 bool AMDGPUOperand::isSDWAInt16Operand() const {
1651   return isSDWAOperand(MVT::i16);
1652 }
1653 
1654 bool AMDGPUOperand::isSDWAInt32Operand() const {
1655   return isSDWAOperand(MVT::i32);
1656 }
1657 
1658 bool AMDGPUOperand::isBoolReg() const {
1659   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1660          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1661 }
1662 
1663 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1664 {
1665   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1666   assert(Size == 2 || Size == 4 || Size == 8);
1667 
1668   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1669 
1670   if (Imm.Mods.Abs) {
1671     Val &= ~FpSignMask;
1672   }
1673   if (Imm.Mods.Neg) {
1674     Val ^= FpSignMask;
1675   }
1676 
1677   return Val;
1678 }
1679 
1680 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1681   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1682                              Inst.getNumOperands())) {
1683     addLiteralImmOperand(Inst, Imm.Val,
1684                          ApplyModifiers &
1685                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1686   } else {
1687     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1688     Inst.addOperand(MCOperand::createImm(Imm.Val));
1689   }
1690 }
1691 
1692 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1693   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1694   auto OpNum = Inst.getNumOperands();
1695   // Check that this operand accepts literals
1696   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1697 
1698   if (ApplyModifiers) {
1699     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1700     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1701     Val = applyInputFPModifiers(Val, Size);
1702   }
1703 
1704   APInt Literal(64, Val);
1705   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1706 
1707   if (Imm.IsFPImm) { // We got fp literal token
1708     switch (OpTy) {
1709     case AMDGPU::OPERAND_REG_IMM_INT64:
1710     case AMDGPU::OPERAND_REG_IMM_FP64:
1711     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1712     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1713       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1714                                        AsmParser->hasInv2PiInlineImm())) {
1715         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1716         return;
1717       }
1718 
1719       // Non-inlineable
1720       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1721         // For fp operands we check if low 32 bits are zeros
1722         if (Literal.getLoBits(32) != 0) {
1723           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1724           "Can't encode literal as exact 64-bit floating-point operand. "
1725           "Low 32-bits will be set to zero");
1726         }
1727 
1728         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1729         return;
1730       }
1731 
1732       // We don't allow fp literals in 64-bit integer instructions. It is
1733       // unclear how we should encode them. This case should be checked earlier
1734       // in predicate methods (isLiteralImm())
1735       llvm_unreachable("fp literal in 64-bit integer instruction.");
1736 
1737     case AMDGPU::OPERAND_REG_IMM_INT32:
1738     case AMDGPU::OPERAND_REG_IMM_FP32:
1739     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1740     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1741     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1742     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1743     case AMDGPU::OPERAND_REG_IMM_INT16:
1744     case AMDGPU::OPERAND_REG_IMM_FP16:
1745     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1746     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1747     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1748     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1749     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1750     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1751     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1752     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1753     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1754     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1755       bool lost;
1756       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1757       // Convert literal to single precision
1758       FPLiteral.convert(*getOpFltSemantics(OpTy),
1759                         APFloat::rmNearestTiesToEven, &lost);
1760       // We allow precision lost but not overflow or underflow. This should be
1761       // checked earlier in isLiteralImm()
1762 
1763       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1764       Inst.addOperand(MCOperand::createImm(ImmVal));
1765       return;
1766     }
1767     default:
1768       llvm_unreachable("invalid operand size");
1769     }
1770 
1771     return;
1772   }
1773 
1774   // We got int literal token.
1775   // Only sign extend inline immediates.
1776   switch (OpTy) {
1777   case AMDGPU::OPERAND_REG_IMM_INT32:
1778   case AMDGPU::OPERAND_REG_IMM_FP32:
1779   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1780   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1781   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1782   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1783   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1784   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1785     if (isSafeTruncation(Val, 32) &&
1786         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1787                                      AsmParser->hasInv2PiInlineImm())) {
1788       Inst.addOperand(MCOperand::createImm(Val));
1789       return;
1790     }
1791 
1792     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1793     return;
1794 
1795   case AMDGPU::OPERAND_REG_IMM_INT64:
1796   case AMDGPU::OPERAND_REG_IMM_FP64:
1797   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1798   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1799     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1800       Inst.addOperand(MCOperand::createImm(Val));
1801       return;
1802     }
1803 
1804     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1805     return;
1806 
1807   case AMDGPU::OPERAND_REG_IMM_INT16:
1808   case AMDGPU::OPERAND_REG_IMM_FP16:
1809   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1810   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1811   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1812   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1813     if (isSafeTruncation(Val, 16) &&
1814         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1815                                      AsmParser->hasInv2PiInlineImm())) {
1816       Inst.addOperand(MCOperand::createImm(Val));
1817       return;
1818     }
1819 
1820     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1821     return;
1822 
1823   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1824   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1825   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1826   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1827     assert(isSafeTruncation(Val, 16));
1828     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1829                                         AsmParser->hasInv2PiInlineImm()));
1830 
1831     Inst.addOperand(MCOperand::createImm(Val));
1832     return;
1833   }
1834   default:
1835     llvm_unreachable("invalid operand size");
1836   }
1837 }
1838 
1839 template <unsigned Bitwidth>
1840 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1841   APInt Literal(64, Imm.Val);
1842 
1843   if (!Imm.IsFPImm) {
1844     // We got int literal token.
1845     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1846     return;
1847   }
1848 
1849   bool Lost;
1850   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1851   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1852                     APFloat::rmNearestTiesToEven, &Lost);
1853   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1854 }
1855 
1856 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1857   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1858 }
1859 
1860 static bool isInlineValue(unsigned Reg) {
1861   switch (Reg) {
1862   case AMDGPU::SRC_SHARED_BASE:
1863   case AMDGPU::SRC_SHARED_LIMIT:
1864   case AMDGPU::SRC_PRIVATE_BASE:
1865   case AMDGPU::SRC_PRIVATE_LIMIT:
1866   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1867     return true;
1868   case AMDGPU::SRC_VCCZ:
1869   case AMDGPU::SRC_EXECZ:
1870   case AMDGPU::SRC_SCC:
1871     return true;
1872   case AMDGPU::SGPR_NULL:
1873     return true;
1874   default:
1875     return false;
1876   }
1877 }
1878 
1879 bool AMDGPUOperand::isInlineValue() const {
1880   return isRegKind() && ::isInlineValue(getReg());
1881 }
1882 
1883 //===----------------------------------------------------------------------===//
1884 // AsmParser
1885 //===----------------------------------------------------------------------===//
1886 
1887 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1888   if (Is == IS_VGPR) {
1889     switch (RegWidth) {
1890       default: return -1;
1891       case 1: return AMDGPU::VGPR_32RegClassID;
1892       case 2: return AMDGPU::VReg_64RegClassID;
1893       case 3: return AMDGPU::VReg_96RegClassID;
1894       case 4: return AMDGPU::VReg_128RegClassID;
1895       case 5: return AMDGPU::VReg_160RegClassID;
1896       case 8: return AMDGPU::VReg_256RegClassID;
1897       case 16: return AMDGPU::VReg_512RegClassID;
1898       case 32: return AMDGPU::VReg_1024RegClassID;
1899     }
1900   } else if (Is == IS_TTMP) {
1901     switch (RegWidth) {
1902       default: return -1;
1903       case 1: return AMDGPU::TTMP_32RegClassID;
1904       case 2: return AMDGPU::TTMP_64RegClassID;
1905       case 4: return AMDGPU::TTMP_128RegClassID;
1906       case 8: return AMDGPU::TTMP_256RegClassID;
1907       case 16: return AMDGPU::TTMP_512RegClassID;
1908     }
1909   } else if (Is == IS_SGPR) {
1910     switch (RegWidth) {
1911       default: return -1;
1912       case 1: return AMDGPU::SGPR_32RegClassID;
1913       case 2: return AMDGPU::SGPR_64RegClassID;
1914       case 4: return AMDGPU::SGPR_128RegClassID;
1915       case 8: return AMDGPU::SGPR_256RegClassID;
1916       case 16: return AMDGPU::SGPR_512RegClassID;
1917     }
1918   } else if (Is == IS_AGPR) {
1919     switch (RegWidth) {
1920       default: return -1;
1921       case 1: return AMDGPU::AGPR_32RegClassID;
1922       case 2: return AMDGPU::AReg_64RegClassID;
1923       case 4: return AMDGPU::AReg_128RegClassID;
1924       case 16: return AMDGPU::AReg_512RegClassID;
1925       case 32: return AMDGPU::AReg_1024RegClassID;
1926     }
1927   }
1928   return -1;
1929 }
1930 
1931 static unsigned getSpecialRegForName(StringRef RegName) {
1932   return StringSwitch<unsigned>(RegName)
1933     .Case("exec", AMDGPU::EXEC)
1934     .Case("vcc", AMDGPU::VCC)
1935     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1936     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1937     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1938     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1939     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1940     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1941     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1942     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1943     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1944     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1945     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1946     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1947     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1948     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1949     .Case("m0", AMDGPU::M0)
1950     .Case("vccz", AMDGPU::SRC_VCCZ)
1951     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1952     .Case("execz", AMDGPU::SRC_EXECZ)
1953     .Case("src_execz", AMDGPU::SRC_EXECZ)
1954     .Case("scc", AMDGPU::SRC_SCC)
1955     .Case("src_scc", AMDGPU::SRC_SCC)
1956     .Case("tba", AMDGPU::TBA)
1957     .Case("tma", AMDGPU::TMA)
1958     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1959     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1960     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1961     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1962     .Case("vcc_lo", AMDGPU::VCC_LO)
1963     .Case("vcc_hi", AMDGPU::VCC_HI)
1964     .Case("exec_lo", AMDGPU::EXEC_LO)
1965     .Case("exec_hi", AMDGPU::EXEC_HI)
1966     .Case("tma_lo", AMDGPU::TMA_LO)
1967     .Case("tma_hi", AMDGPU::TMA_HI)
1968     .Case("tba_lo", AMDGPU::TBA_LO)
1969     .Case("tba_hi", AMDGPU::TBA_HI)
1970     .Case("null", AMDGPU::SGPR_NULL)
1971     .Default(AMDGPU::NoRegister);
1972 }
1973 
1974 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1975                                     SMLoc &EndLoc) {
1976   auto R = parseRegister();
1977   if (!R) return true;
1978   assert(R->isReg());
1979   RegNo = R->getReg();
1980   StartLoc = R->getStartLoc();
1981   EndLoc = R->getEndLoc();
1982   return false;
1983 }
1984 
1985 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1986                                             RegisterKind RegKind, unsigned Reg1) {
1987   switch (RegKind) {
1988   case IS_SPECIAL:
1989     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1990       Reg = AMDGPU::EXEC;
1991       RegWidth = 2;
1992       return true;
1993     }
1994     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1995       Reg = AMDGPU::FLAT_SCR;
1996       RegWidth = 2;
1997       return true;
1998     }
1999     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2000       Reg = AMDGPU::XNACK_MASK;
2001       RegWidth = 2;
2002       return true;
2003     }
2004     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2005       Reg = AMDGPU::VCC;
2006       RegWidth = 2;
2007       return true;
2008     }
2009     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2010       Reg = AMDGPU::TBA;
2011       RegWidth = 2;
2012       return true;
2013     }
2014     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2015       Reg = AMDGPU::TMA;
2016       RegWidth = 2;
2017       return true;
2018     }
2019     return false;
2020   case IS_VGPR:
2021   case IS_SGPR:
2022   case IS_AGPR:
2023   case IS_TTMP:
2024     if (Reg1 != Reg + RegWidth) {
2025       return false;
2026     }
2027     RegWidth++;
2028     return true;
2029   default:
2030     llvm_unreachable("unexpected register kind");
2031   }
2032 }
2033 
2034 struct RegInfo {
2035   StringLiteral Name;
2036   RegisterKind Kind;
2037 };
2038 
2039 static constexpr RegInfo RegularRegisters[] = {
2040   {{"v"},    IS_VGPR},
2041   {{"s"},    IS_SGPR},
2042   {{"ttmp"}, IS_TTMP},
2043   {{"acc"},  IS_AGPR},
2044   {{"a"},    IS_AGPR},
2045 };
2046 
2047 static bool isRegularReg(RegisterKind Kind) {
2048   return Kind == IS_VGPR ||
2049          Kind == IS_SGPR ||
2050          Kind == IS_TTMP ||
2051          Kind == IS_AGPR;
2052 }
2053 
2054 static const RegInfo* getRegularRegInfo(StringRef Str) {
2055   for (const RegInfo &Reg : RegularRegisters)
2056     if (Str.startswith(Reg.Name))
2057       return &Reg;
2058   return nullptr;
2059 }
2060 
2061 static bool getRegNum(StringRef Str, unsigned& Num) {
2062   return !Str.getAsInteger(10, Num);
2063 }
2064 
2065 bool
2066 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2067                             const AsmToken &NextToken) const {
2068 
2069   // A list of consecutive registers: [s0,s1,s2,s3]
2070   if (Token.is(AsmToken::LBrac))
2071     return true;
2072 
2073   if (!Token.is(AsmToken::Identifier))
2074     return false;
2075 
2076   // A single register like s0 or a range of registers like s[0:1]
2077 
2078   StringRef Str = Token.getString();
2079   const RegInfo *Reg = getRegularRegInfo(Str);
2080   if (Reg) {
2081     StringRef RegName = Reg->Name;
2082     StringRef RegSuffix = Str.substr(RegName.size());
2083     if (!RegSuffix.empty()) {
2084       unsigned Num;
2085       // A single register with an index: rXX
2086       if (getRegNum(RegSuffix, Num))
2087         return true;
2088     } else {
2089       // A range of registers: r[XX:YY].
2090       if (NextToken.is(AsmToken::LBrac))
2091         return true;
2092     }
2093   }
2094 
2095   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2096 }
2097 
2098 bool
2099 AMDGPUAsmParser::isRegister()
2100 {
2101   return isRegister(getToken(), peekToken());
2102 }
2103 
2104 unsigned
2105 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2106                                unsigned RegNum,
2107                                unsigned RegWidth) {
2108 
2109   assert(isRegularReg(RegKind));
2110 
2111   unsigned AlignSize = 1;
2112   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2113     // SGPR and TTMP registers must be aligned.
2114     // Max required alignment is 4 dwords.
2115     AlignSize = std::min(RegWidth, 4u);
2116   }
2117 
2118   if (RegNum % AlignSize != 0)
2119     return AMDGPU::NoRegister;
2120 
2121   unsigned RegIdx = RegNum / AlignSize;
2122   int RCID = getRegClass(RegKind, RegWidth);
2123   if (RCID == -1)
2124     return AMDGPU::NoRegister;
2125 
2126   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2127   const MCRegisterClass RC = TRI->getRegClass(RCID);
2128   if (RegIdx >= RC.getNumRegs())
2129     return AMDGPU::NoRegister;
2130 
2131   return RC.getRegister(RegIdx);
2132 }
2133 
2134 bool
2135 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2136   int64_t RegLo, RegHi;
2137   if (!trySkipToken(AsmToken::LBrac))
2138     return false;
2139 
2140   if (!parseExpr(RegLo))
2141     return false;
2142 
2143   if (trySkipToken(AsmToken::Colon)) {
2144     if (!parseExpr(RegHi))
2145       return false;
2146   } else {
2147     RegHi = RegLo;
2148   }
2149 
2150   if (!trySkipToken(AsmToken::RBrac))
2151     return false;
2152 
2153   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2154     return false;
2155 
2156   Num = static_cast<unsigned>(RegLo);
2157   Width = (RegHi - RegLo) + 1;
2158   return true;
2159 }
2160 
2161 unsigned
2162 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2163                                  unsigned &RegNum,
2164                                  unsigned &RegWidth) {
2165   assert(isToken(AsmToken::Identifier));
2166   unsigned Reg = getSpecialRegForName(getTokenStr());
2167   if (Reg) {
2168     RegNum = 0;
2169     RegWidth = 1;
2170     RegKind = IS_SPECIAL;
2171     lex(); // skip register name
2172   }
2173   return Reg;
2174 }
2175 
2176 unsigned
2177 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2178                                  unsigned &RegNum,
2179                                  unsigned &RegWidth) {
2180   assert(isToken(AsmToken::Identifier));
2181   StringRef RegName = getTokenStr();
2182 
2183   const RegInfo *RI = getRegularRegInfo(RegName);
2184   if (!RI)
2185     return AMDGPU::NoRegister;
2186   lex(); // skip register name
2187 
2188   RegKind = RI->Kind;
2189   StringRef RegSuffix = RegName.substr(RI->Name.size());
2190   if (!RegSuffix.empty()) {
2191     // Single 32-bit register: vXX.
2192     if (!getRegNum(RegSuffix, RegNum))
2193       return AMDGPU::NoRegister;
2194     RegWidth = 1;
2195   } else {
2196     // Range of registers: v[XX:YY]. ":YY" is optional.
2197     if (!ParseRegRange(RegNum, RegWidth))
2198       return AMDGPU::NoRegister;
2199   }
2200 
2201   return getRegularReg(RegKind, RegNum, RegWidth);
2202 }
2203 
2204 unsigned
2205 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2206                               unsigned &RegNum,
2207                               unsigned &RegWidth) {
2208   unsigned Reg = AMDGPU::NoRegister;
2209 
2210   if (!trySkipToken(AsmToken::LBrac))
2211     return AMDGPU::NoRegister;
2212 
2213   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2214 
2215   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2216     return AMDGPU::NoRegister;
2217   if (RegWidth != 1)
2218     return AMDGPU::NoRegister;
2219 
2220   for (; trySkipToken(AsmToken::Comma); ) {
2221     RegisterKind NextRegKind;
2222     unsigned NextReg, NextRegNum, NextRegWidth;
2223 
2224     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2225       return AMDGPU::NoRegister;
2226     if (NextRegWidth != 1)
2227       return AMDGPU::NoRegister;
2228     if (NextRegKind != RegKind)
2229       return AMDGPU::NoRegister;
2230     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2231       return AMDGPU::NoRegister;
2232   }
2233 
2234   if (!trySkipToken(AsmToken::RBrac))
2235     return AMDGPU::NoRegister;
2236 
2237   if (isRegularReg(RegKind))
2238     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2239 
2240   return Reg;
2241 }
2242 
2243 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2244                                           unsigned &Reg,
2245                                           unsigned &RegNum,
2246                                           unsigned &RegWidth) {
2247   Reg = AMDGPU::NoRegister;
2248 
2249   if (isToken(AsmToken::Identifier)) {
2250     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2251     if (Reg == AMDGPU::NoRegister)
2252       Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2253   } else {
2254     Reg = ParseRegList(RegKind, RegNum, RegWidth);
2255   }
2256 
2257   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2258   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2259 }
2260 
2261 Optional<StringRef>
2262 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2263   switch (RegKind) {
2264   case IS_VGPR:
2265     return StringRef(".amdgcn.next_free_vgpr");
2266   case IS_SGPR:
2267     return StringRef(".amdgcn.next_free_sgpr");
2268   default:
2269     return None;
2270   }
2271 }
2272 
2273 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2274   auto SymbolName = getGprCountSymbolName(RegKind);
2275   assert(SymbolName && "initializing invalid register kind");
2276   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2277   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2278 }
2279 
2280 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2281                                             unsigned DwordRegIndex,
2282                                             unsigned RegWidth) {
2283   // Symbols are only defined for GCN targets
2284   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2285     return true;
2286 
2287   auto SymbolName = getGprCountSymbolName(RegKind);
2288   if (!SymbolName)
2289     return true;
2290   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2291 
2292   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2293   int64_t OldCount;
2294 
2295   if (!Sym->isVariable())
2296     return !Error(getParser().getTok().getLoc(),
2297                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2298   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2299     return !Error(
2300         getParser().getTok().getLoc(),
2301         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2302 
2303   if (OldCount <= NewMax)
2304     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2305 
2306   return true;
2307 }
2308 
2309 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2310   const auto &Tok = Parser.getTok();
2311   SMLoc StartLoc = Tok.getLoc();
2312   SMLoc EndLoc = Tok.getEndLoc();
2313   RegisterKind RegKind;
2314   unsigned Reg, RegNum, RegWidth;
2315 
2316   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2317     //FIXME: improve error messages (bug 41303).
2318     Error(StartLoc, "not a valid operand.");
2319     return nullptr;
2320   }
2321   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2322     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2323       return nullptr;
2324   } else
2325     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2326   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2327 }
2328 
2329 OperandMatchResultTy
2330 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2331   // TODO: add syntactic sugar for 1/(2*PI)
2332 
2333   assert(!isRegister());
2334   assert(!isModifier());
2335 
2336   const auto& Tok = getToken();
2337   const auto& NextTok = peekToken();
2338   bool IsReal = Tok.is(AsmToken::Real);
2339   SMLoc S = getLoc();
2340   bool Negate = false;
2341 
2342   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2343     lex();
2344     IsReal = true;
2345     Negate = true;
2346   }
2347 
2348   if (IsReal) {
2349     // Floating-point expressions are not supported.
2350     // Can only allow floating-point literals with an
2351     // optional sign.
2352 
2353     StringRef Num = getTokenStr();
2354     lex();
2355 
2356     APFloat RealVal(APFloat::IEEEdouble());
2357     auto roundMode = APFloat::rmNearestTiesToEven;
2358     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2359       return MatchOperand_ParseFail;
2360     }
2361     if (Negate)
2362       RealVal.changeSign();
2363 
2364     Operands.push_back(
2365       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2366                                AMDGPUOperand::ImmTyNone, true));
2367 
2368     return MatchOperand_Success;
2369 
2370   } else {
2371     int64_t IntVal;
2372     const MCExpr *Expr;
2373     SMLoc S = getLoc();
2374 
2375     if (HasSP3AbsModifier) {
2376       // This is a workaround for handling expressions
2377       // as arguments of SP3 'abs' modifier, for example:
2378       //     |1.0|
2379       //     |-1|
2380       //     |1+x|
2381       // This syntax is not compatible with syntax of standard
2382       // MC expressions (due to the trailing '|').
2383       SMLoc EndLoc;
2384       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2385         return MatchOperand_ParseFail;
2386     } else {
2387       if (Parser.parseExpression(Expr))
2388         return MatchOperand_ParseFail;
2389     }
2390 
2391     if (Expr->evaluateAsAbsolute(IntVal)) {
2392       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2393     } else {
2394       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2395     }
2396 
2397     return MatchOperand_Success;
2398   }
2399 
2400   return MatchOperand_NoMatch;
2401 }
2402 
2403 OperandMatchResultTy
2404 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2405   if (!isRegister())
2406     return MatchOperand_NoMatch;
2407 
2408   if (auto R = parseRegister()) {
2409     assert(R->isReg());
2410     Operands.push_back(std::move(R));
2411     return MatchOperand_Success;
2412   }
2413   return MatchOperand_ParseFail;
2414 }
2415 
2416 OperandMatchResultTy
2417 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2418   auto res = parseReg(Operands);
2419   if (res != MatchOperand_NoMatch) {
2420     return res;
2421   } else if (isModifier()) {
2422     return MatchOperand_NoMatch;
2423   } else {
2424     return parseImm(Operands, HasSP3AbsMod);
2425   }
2426 }
2427 
2428 bool
2429 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2430   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2431     const auto &str = Token.getString();
2432     return str == "abs" || str == "neg" || str == "sext";
2433   }
2434   return false;
2435 }
2436 
2437 bool
2438 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2439   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2440 }
2441 
2442 bool
2443 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2444   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2445 }
2446 
2447 bool
2448 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2449   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2450 }
2451 
2452 // Check if this is an operand modifier or an opcode modifier
2453 // which may look like an expression but it is not. We should
2454 // avoid parsing these modifiers as expressions. Currently
2455 // recognized sequences are:
2456 //   |...|
2457 //   abs(...)
2458 //   neg(...)
2459 //   sext(...)
2460 //   -reg
2461 //   -|...|
2462 //   -abs(...)
2463 //   name:...
2464 // Note that simple opcode modifiers like 'gds' may be parsed as
2465 // expressions; this is a special case. See getExpressionAsToken.
2466 //
2467 bool
2468 AMDGPUAsmParser::isModifier() {
2469 
2470   AsmToken Tok = getToken();
2471   AsmToken NextToken[2];
2472   peekTokens(NextToken);
2473 
2474   return isOperandModifier(Tok, NextToken[0]) ||
2475          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2476          isOpcodeModifierWithVal(Tok, NextToken[0]);
2477 }
2478 
2479 // Check if the current token is an SP3 'neg' modifier.
2480 // Currently this modifier is allowed in the following context:
2481 //
2482 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2483 // 2. Before an 'abs' modifier: -abs(...)
2484 // 3. Before an SP3 'abs' modifier: -|...|
2485 //
2486 // In all other cases "-" is handled as a part
2487 // of an expression that follows the sign.
2488 //
2489 // Note: When "-" is followed by an integer literal,
2490 // this is interpreted as integer negation rather
2491 // than a floating-point NEG modifier applied to N.
2492 // Beside being contr-intuitive, such use of floating-point
2493 // NEG modifier would have resulted in different meaning
2494 // of integer literals used with VOP1/2/C and VOP3,
2495 // for example:
2496 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2497 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2498 // Negative fp literals with preceding "-" are
2499 // handled likewise for unifomtity
2500 //
2501 bool
2502 AMDGPUAsmParser::parseSP3NegModifier() {
2503 
2504   AsmToken NextToken[2];
2505   peekTokens(NextToken);
2506 
2507   if (isToken(AsmToken::Minus) &&
2508       (isRegister(NextToken[0], NextToken[1]) ||
2509        NextToken[0].is(AsmToken::Pipe) ||
2510        isId(NextToken[0], "abs"))) {
2511     lex();
2512     return true;
2513   }
2514 
2515   return false;
2516 }
2517 
2518 OperandMatchResultTy
2519 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2520                                               bool AllowImm) {
2521   bool Neg, SP3Neg;
2522   bool Abs, SP3Abs;
2523   SMLoc Loc;
2524 
2525   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2526   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2527     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2528     return MatchOperand_ParseFail;
2529   }
2530 
2531   SP3Neg = parseSP3NegModifier();
2532 
2533   Loc = getLoc();
2534   Neg = trySkipId("neg");
2535   if (Neg && SP3Neg) {
2536     Error(Loc, "expected register or immediate");
2537     return MatchOperand_ParseFail;
2538   }
2539   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2540     return MatchOperand_ParseFail;
2541 
2542   Abs = trySkipId("abs");
2543   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2544     return MatchOperand_ParseFail;
2545 
2546   Loc = getLoc();
2547   SP3Abs = trySkipToken(AsmToken::Pipe);
2548   if (Abs && SP3Abs) {
2549     Error(Loc, "expected register or immediate");
2550     return MatchOperand_ParseFail;
2551   }
2552 
2553   OperandMatchResultTy Res;
2554   if (AllowImm) {
2555     Res = parseRegOrImm(Operands, SP3Abs);
2556   } else {
2557     Res = parseReg(Operands);
2558   }
2559   if (Res != MatchOperand_Success) {
2560     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2561   }
2562 
2563   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2564     return MatchOperand_ParseFail;
2565   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2566     return MatchOperand_ParseFail;
2567   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2568     return MatchOperand_ParseFail;
2569 
2570   AMDGPUOperand::Modifiers Mods;
2571   Mods.Abs = Abs || SP3Abs;
2572   Mods.Neg = Neg || SP3Neg;
2573 
2574   if (Mods.hasFPModifiers()) {
2575     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2576     if (Op.isExpr()) {
2577       Error(Op.getStartLoc(), "expected an absolute expression");
2578       return MatchOperand_ParseFail;
2579     }
2580     Op.setModifiers(Mods);
2581   }
2582   return MatchOperand_Success;
2583 }
2584 
2585 OperandMatchResultTy
2586 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2587                                                bool AllowImm) {
2588   bool Sext = trySkipId("sext");
2589   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2590     return MatchOperand_ParseFail;
2591 
2592   OperandMatchResultTy Res;
2593   if (AllowImm) {
2594     Res = parseRegOrImm(Operands);
2595   } else {
2596     Res = parseReg(Operands);
2597   }
2598   if (Res != MatchOperand_Success) {
2599     return Sext? MatchOperand_ParseFail : Res;
2600   }
2601 
2602   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2603     return MatchOperand_ParseFail;
2604 
2605   AMDGPUOperand::Modifiers Mods;
2606   Mods.Sext = Sext;
2607 
2608   if (Mods.hasIntModifiers()) {
2609     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2610     if (Op.isExpr()) {
2611       Error(Op.getStartLoc(), "expected an absolute expression");
2612       return MatchOperand_ParseFail;
2613     }
2614     Op.setModifiers(Mods);
2615   }
2616 
2617   return MatchOperand_Success;
2618 }
2619 
2620 OperandMatchResultTy
2621 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2622   return parseRegOrImmWithFPInputMods(Operands, false);
2623 }
2624 
2625 OperandMatchResultTy
2626 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2627   return parseRegOrImmWithIntInputMods(Operands, false);
2628 }
2629 
2630 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2631   auto Loc = getLoc();
2632   if (trySkipId("off")) {
2633     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2634                                                 AMDGPUOperand::ImmTyOff, false));
2635     return MatchOperand_Success;
2636   }
2637 
2638   if (!isRegister())
2639     return MatchOperand_NoMatch;
2640 
2641   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2642   if (Reg) {
2643     Operands.push_back(std::move(Reg));
2644     return MatchOperand_Success;
2645   }
2646 
2647   return MatchOperand_ParseFail;
2648 
2649 }
2650 
2651 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2652   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2653 
2654   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2655       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2656       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2657       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2658     return Match_InvalidOperand;
2659 
2660   if ((TSFlags & SIInstrFlags::VOP3) &&
2661       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2662       getForcedEncodingSize() != 64)
2663     return Match_PreferE32;
2664 
2665   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2666       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2667     // v_mac_f32/16 allow only dst_sel == DWORD;
2668     auto OpNum =
2669         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2670     const auto &Op = Inst.getOperand(OpNum);
2671     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2672       return Match_InvalidOperand;
2673     }
2674   }
2675 
2676   return Match_Success;
2677 }
2678 
2679 // What asm variants we should check
2680 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2681   if (getForcedEncodingSize() == 32) {
2682     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2683     return makeArrayRef(Variants);
2684   }
2685 
2686   if (isForcedVOP3()) {
2687     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2688     return makeArrayRef(Variants);
2689   }
2690 
2691   if (isForcedSDWA()) {
2692     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2693                                         AMDGPUAsmVariants::SDWA9};
2694     return makeArrayRef(Variants);
2695   }
2696 
2697   if (isForcedDPP()) {
2698     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2699     return makeArrayRef(Variants);
2700   }
2701 
2702   static const unsigned Variants[] = {
2703     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2704     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2705   };
2706 
2707   return makeArrayRef(Variants);
2708 }
2709 
2710 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2711   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2712   const unsigned Num = Desc.getNumImplicitUses();
2713   for (unsigned i = 0; i < Num; ++i) {
2714     unsigned Reg = Desc.ImplicitUses[i];
2715     switch (Reg) {
2716     case AMDGPU::FLAT_SCR:
2717     case AMDGPU::VCC:
2718     case AMDGPU::VCC_LO:
2719     case AMDGPU::VCC_HI:
2720     case AMDGPU::M0:
2721       return Reg;
2722     default:
2723       break;
2724     }
2725   }
2726   return AMDGPU::NoRegister;
2727 }
2728 
2729 // NB: This code is correct only when used to check constant
2730 // bus limitations because GFX7 support no f16 inline constants.
2731 // Note that there are no cases when a GFX7 opcode violates
2732 // constant bus limitations due to the use of an f16 constant.
2733 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2734                                        unsigned OpIdx) const {
2735   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2736 
2737   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2738     return false;
2739   }
2740 
2741   const MCOperand &MO = Inst.getOperand(OpIdx);
2742 
2743   int64_t Val = MO.getImm();
2744   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2745 
2746   switch (OpSize) { // expected operand size
2747   case 8:
2748     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2749   case 4:
2750     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2751   case 2: {
2752     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2753     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2754         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2755         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2756         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2757         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2758         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2759       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2760     } else {
2761       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2762     }
2763   }
2764   default:
2765     llvm_unreachable("invalid operand size");
2766   }
2767 }
2768 
2769 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2770   if (!isGFX10())
2771     return 1;
2772 
2773   switch (Opcode) {
2774   // 64-bit shift instructions can use only one scalar value input
2775   case AMDGPU::V_LSHLREV_B64:
2776   case AMDGPU::V_LSHLREV_B64_gfx10:
2777   case AMDGPU::V_LSHL_B64:
2778   case AMDGPU::V_LSHRREV_B64:
2779   case AMDGPU::V_LSHRREV_B64_gfx10:
2780   case AMDGPU::V_LSHR_B64:
2781   case AMDGPU::V_ASHRREV_I64:
2782   case AMDGPU::V_ASHRREV_I64_gfx10:
2783   case AMDGPU::V_ASHR_I64:
2784     return 1;
2785   default:
2786     return 2;
2787   }
2788 }
2789 
2790 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2791   const MCOperand &MO = Inst.getOperand(OpIdx);
2792   if (MO.isImm()) {
2793     return !isInlineConstant(Inst, OpIdx);
2794   } else if (MO.isReg()) {
2795     auto Reg = MO.getReg();
2796     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2797     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2798   } else {
2799     return true;
2800   }
2801 }
2802 
2803 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2804   const unsigned Opcode = Inst.getOpcode();
2805   const MCInstrDesc &Desc = MII.get(Opcode);
2806   unsigned ConstantBusUseCount = 0;
2807   unsigned NumLiterals = 0;
2808   unsigned LiteralSize;
2809 
2810   if (Desc.TSFlags &
2811       (SIInstrFlags::VOPC |
2812        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2813        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2814        SIInstrFlags::SDWA)) {
2815     // Check special imm operands (used by madmk, etc)
2816     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2817       ++ConstantBusUseCount;
2818     }
2819 
2820     SmallDenseSet<unsigned> SGPRsUsed;
2821     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2822     if (SGPRUsed != AMDGPU::NoRegister) {
2823       SGPRsUsed.insert(SGPRUsed);
2824       ++ConstantBusUseCount;
2825     }
2826 
2827     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2828     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2829     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2830 
2831     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2832 
2833     for (int OpIdx : OpIndices) {
2834       if (OpIdx == -1) break;
2835 
2836       const MCOperand &MO = Inst.getOperand(OpIdx);
2837       if (usesConstantBus(Inst, OpIdx)) {
2838         if (MO.isReg()) {
2839           const unsigned Reg = mc2PseudoReg(MO.getReg());
2840           // Pairs of registers with a partial intersections like these
2841           //   s0, s[0:1]
2842           //   flat_scratch_lo, flat_scratch
2843           //   flat_scratch_lo, flat_scratch_hi
2844           // are theoretically valid but they are disabled anyway.
2845           // Note that this code mimics SIInstrInfo::verifyInstruction
2846           if (!SGPRsUsed.count(Reg)) {
2847             SGPRsUsed.insert(Reg);
2848             ++ConstantBusUseCount;
2849           }
2850         } else { // Expression or a literal
2851 
2852           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2853             continue; // special operand like VINTERP attr_chan
2854 
2855           // An instruction may use only one literal.
2856           // This has been validated on the previous step.
2857           // See validateVOP3Literal.
2858           // This literal may be used as more than one operand.
2859           // If all these operands are of the same size,
2860           // this literal counts as one scalar value.
2861           // Otherwise it counts as 2 scalar values.
2862           // See "GFX10 Shader Programming", section 3.6.2.3.
2863 
2864           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2865           if (Size < 4) Size = 4;
2866 
2867           if (NumLiterals == 0) {
2868             NumLiterals = 1;
2869             LiteralSize = Size;
2870           } else if (LiteralSize != Size) {
2871             NumLiterals = 2;
2872           }
2873         }
2874       }
2875     }
2876   }
2877   ConstantBusUseCount += NumLiterals;
2878 
2879   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2880 }
2881 
2882 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2883   const unsigned Opcode = Inst.getOpcode();
2884   const MCInstrDesc &Desc = MII.get(Opcode);
2885 
2886   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2887   if (DstIdx == -1 ||
2888       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2889     return true;
2890   }
2891 
2892   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2893 
2894   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2895   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2896   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2897 
2898   assert(DstIdx != -1);
2899   const MCOperand &Dst = Inst.getOperand(DstIdx);
2900   assert(Dst.isReg());
2901   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2902 
2903   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2904 
2905   for (int SrcIdx : SrcIndices) {
2906     if (SrcIdx == -1) break;
2907     const MCOperand &Src = Inst.getOperand(SrcIdx);
2908     if (Src.isReg()) {
2909       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2910       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2911         return false;
2912       }
2913     }
2914   }
2915 
2916   return true;
2917 }
2918 
2919 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2920 
2921   const unsigned Opc = Inst.getOpcode();
2922   const MCInstrDesc &Desc = MII.get(Opc);
2923 
2924   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2925     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2926     assert(ClampIdx != -1);
2927     return Inst.getOperand(ClampIdx).getImm() == 0;
2928   }
2929 
2930   return true;
2931 }
2932 
2933 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2934 
2935   const unsigned Opc = Inst.getOpcode();
2936   const MCInstrDesc &Desc = MII.get(Opc);
2937 
2938   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2939     return true;
2940 
2941   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2942   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2943   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2944 
2945   assert(VDataIdx != -1);
2946   assert(DMaskIdx != -1);
2947   assert(TFEIdx != -1);
2948 
2949   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2950   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2951   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2952   if (DMask == 0)
2953     DMask = 1;
2954 
2955   unsigned DataSize =
2956     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2957   if (hasPackedD16()) {
2958     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2959     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2960       DataSize = (DataSize + 1) / 2;
2961   }
2962 
2963   return (VDataSize / 4) == DataSize + TFESize;
2964 }
2965 
2966 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2967   const unsigned Opc = Inst.getOpcode();
2968   const MCInstrDesc &Desc = MII.get(Opc);
2969 
2970   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2971     return true;
2972 
2973   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2974   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2975       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2976   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2977   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2978   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2979 
2980   assert(VAddr0Idx != -1);
2981   assert(SrsrcIdx != -1);
2982   assert(DimIdx != -1);
2983   assert(SrsrcIdx > VAddr0Idx);
2984 
2985   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2986   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2987   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2988   unsigned VAddrSize =
2989       IsNSA ? SrsrcIdx - VAddr0Idx
2990             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2991 
2992   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2993                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2994                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2995                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2996   if (!IsNSA) {
2997     if (AddrSize > 8)
2998       AddrSize = 16;
2999     else if (AddrSize > 4)
3000       AddrSize = 8;
3001   }
3002 
3003   return VAddrSize == AddrSize;
3004 }
3005 
3006 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3007 
3008   const unsigned Opc = Inst.getOpcode();
3009   const MCInstrDesc &Desc = MII.get(Opc);
3010 
3011   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3012     return true;
3013   if (!Desc.mayLoad() || !Desc.mayStore())
3014     return true; // Not atomic
3015 
3016   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3017   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3018 
3019   // This is an incomplete check because image_atomic_cmpswap
3020   // may only use 0x3 and 0xf while other atomic operations
3021   // may use 0x1 and 0x3. However these limitations are
3022   // verified when we check that dmask matches dst size.
3023   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3024 }
3025 
3026 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3027 
3028   const unsigned Opc = Inst.getOpcode();
3029   const MCInstrDesc &Desc = MII.get(Opc);
3030 
3031   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3032     return true;
3033 
3034   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3035   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3036 
3037   // GATHER4 instructions use dmask in a different fashion compared to
3038   // other MIMG instructions. The only useful DMASK values are
3039   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3040   // (red,red,red,red) etc.) The ISA document doesn't mention
3041   // this.
3042   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3043 }
3044 
3045 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3046 
3047   const unsigned Opc = Inst.getOpcode();
3048   const MCInstrDesc &Desc = MII.get(Opc);
3049 
3050   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3051     return true;
3052 
3053   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3054   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3055     if (isCI() || isSI())
3056       return false;
3057   }
3058 
3059   return true;
3060 }
3061 
3062 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3063   const unsigned Opc = Inst.getOpcode();
3064   const MCInstrDesc &Desc = MII.get(Opc);
3065 
3066   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3067     return true;
3068 
3069   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3070   if (DimIdx < 0)
3071     return true;
3072 
3073   long Imm = Inst.getOperand(DimIdx).getImm();
3074   if (Imm < 0 || Imm >= 8)
3075     return false;
3076 
3077   return true;
3078 }
3079 
3080 static bool IsRevOpcode(const unsigned Opcode)
3081 {
3082   switch (Opcode) {
3083   case AMDGPU::V_SUBREV_F32_e32:
3084   case AMDGPU::V_SUBREV_F32_e64:
3085   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3086   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3087   case AMDGPU::V_SUBREV_F32_e32_vi:
3088   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3089   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3090   case AMDGPU::V_SUBREV_F32_e64_vi:
3091 
3092   case AMDGPU::V_SUBREV_I32_e32:
3093   case AMDGPU::V_SUBREV_I32_e64:
3094   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3095   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3096 
3097   case AMDGPU::V_SUBBREV_U32_e32:
3098   case AMDGPU::V_SUBBREV_U32_e64:
3099   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3100   case AMDGPU::V_SUBBREV_U32_e32_vi:
3101   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3102   case AMDGPU::V_SUBBREV_U32_e64_vi:
3103 
3104   case AMDGPU::V_SUBREV_U32_e32:
3105   case AMDGPU::V_SUBREV_U32_e64:
3106   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3107   case AMDGPU::V_SUBREV_U32_e32_vi:
3108   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3109   case AMDGPU::V_SUBREV_U32_e64_vi:
3110 
3111   case AMDGPU::V_SUBREV_F16_e32:
3112   case AMDGPU::V_SUBREV_F16_e64:
3113   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3114   case AMDGPU::V_SUBREV_F16_e32_vi:
3115   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3116   case AMDGPU::V_SUBREV_F16_e64_vi:
3117 
3118   case AMDGPU::V_SUBREV_U16_e32:
3119   case AMDGPU::V_SUBREV_U16_e64:
3120   case AMDGPU::V_SUBREV_U16_e32_vi:
3121   case AMDGPU::V_SUBREV_U16_e64_vi:
3122 
3123   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3124   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3125   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3126 
3127   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3128   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3129 
3130   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3131   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3132 
3133   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3134   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3135 
3136   case AMDGPU::V_LSHRREV_B32_e32:
3137   case AMDGPU::V_LSHRREV_B32_e64:
3138   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3139   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3140   case AMDGPU::V_LSHRREV_B32_e32_vi:
3141   case AMDGPU::V_LSHRREV_B32_e64_vi:
3142   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3143   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3144 
3145   case AMDGPU::V_ASHRREV_I32_e32:
3146   case AMDGPU::V_ASHRREV_I32_e64:
3147   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3148   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3149   case AMDGPU::V_ASHRREV_I32_e32_vi:
3150   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3151   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3152   case AMDGPU::V_ASHRREV_I32_e64_vi:
3153 
3154   case AMDGPU::V_LSHLREV_B32_e32:
3155   case AMDGPU::V_LSHLREV_B32_e64:
3156   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3157   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3158   case AMDGPU::V_LSHLREV_B32_e32_vi:
3159   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3160   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3161   case AMDGPU::V_LSHLREV_B32_e64_vi:
3162 
3163   case AMDGPU::V_LSHLREV_B16_e32:
3164   case AMDGPU::V_LSHLREV_B16_e64:
3165   case AMDGPU::V_LSHLREV_B16_e32_vi:
3166   case AMDGPU::V_LSHLREV_B16_e64_vi:
3167   case AMDGPU::V_LSHLREV_B16_gfx10:
3168 
3169   case AMDGPU::V_LSHRREV_B16_e32:
3170   case AMDGPU::V_LSHRREV_B16_e64:
3171   case AMDGPU::V_LSHRREV_B16_e32_vi:
3172   case AMDGPU::V_LSHRREV_B16_e64_vi:
3173   case AMDGPU::V_LSHRREV_B16_gfx10:
3174 
3175   case AMDGPU::V_ASHRREV_I16_e32:
3176   case AMDGPU::V_ASHRREV_I16_e64:
3177   case AMDGPU::V_ASHRREV_I16_e32_vi:
3178   case AMDGPU::V_ASHRREV_I16_e64_vi:
3179   case AMDGPU::V_ASHRREV_I16_gfx10:
3180 
3181   case AMDGPU::V_LSHLREV_B64:
3182   case AMDGPU::V_LSHLREV_B64_gfx10:
3183   case AMDGPU::V_LSHLREV_B64_vi:
3184 
3185   case AMDGPU::V_LSHRREV_B64:
3186   case AMDGPU::V_LSHRREV_B64_gfx10:
3187   case AMDGPU::V_LSHRREV_B64_vi:
3188 
3189   case AMDGPU::V_ASHRREV_I64:
3190   case AMDGPU::V_ASHRREV_I64_gfx10:
3191   case AMDGPU::V_ASHRREV_I64_vi:
3192 
3193   case AMDGPU::V_PK_LSHLREV_B16:
3194   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3195   case AMDGPU::V_PK_LSHLREV_B16_vi:
3196 
3197   case AMDGPU::V_PK_LSHRREV_B16:
3198   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3199   case AMDGPU::V_PK_LSHRREV_B16_vi:
3200   case AMDGPU::V_PK_ASHRREV_I16:
3201   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3202   case AMDGPU::V_PK_ASHRREV_I16_vi:
3203     return true;
3204   default:
3205     return false;
3206   }
3207 }
3208 
3209 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3210 
3211   using namespace SIInstrFlags;
3212   const unsigned Opcode = Inst.getOpcode();
3213   const MCInstrDesc &Desc = MII.get(Opcode);
3214 
3215   // lds_direct register is defined so that it can be used
3216   // with 9-bit operands only. Ignore encodings which do not accept these.
3217   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3218     return true;
3219 
3220   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3221   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3222   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3223 
3224   const int SrcIndices[] = { Src1Idx, Src2Idx };
3225 
3226   // lds_direct cannot be specified as either src1 or src2.
3227   for (int SrcIdx : SrcIndices) {
3228     if (SrcIdx == -1) break;
3229     const MCOperand &Src = Inst.getOperand(SrcIdx);
3230     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3231       return false;
3232     }
3233   }
3234 
3235   if (Src0Idx == -1)
3236     return true;
3237 
3238   const MCOperand &Src = Inst.getOperand(Src0Idx);
3239   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3240     return true;
3241 
3242   // lds_direct is specified as src0. Check additional limitations.
3243   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3244 }
3245 
3246 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3247   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3248     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3249     if (Op.isFlatOffset())
3250       return Op.getStartLoc();
3251   }
3252   return getLoc();
3253 }
3254 
3255 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3256                                          const OperandVector &Operands) {
3257   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3258   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3259     return true;
3260 
3261   auto Opcode = Inst.getOpcode();
3262   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3263   assert(OpNum != -1);
3264 
3265   const auto &Op = Inst.getOperand(OpNum);
3266   if (!hasFlatOffsets() && Op.getImm() != 0) {
3267     Error(getFlatOffsetLoc(Operands),
3268           "flat offset modifier is not supported on this GPU");
3269     return false;
3270   }
3271 
3272   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3273   // For FLAT segment the offset must be positive;
3274   // MSB is ignored and forced to zero.
3275   unsigned OffsetSize = isGFX9() ? 13 : 12;
3276   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3277     if (!isIntN(OffsetSize, Op.getImm())) {
3278       Error(getFlatOffsetLoc(Operands),
3279             isGFX9() ? "expected a 13-bit signed offset" :
3280                        "expected a 12-bit signed offset");
3281       return false;
3282     }
3283   } else {
3284     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3285       Error(getFlatOffsetLoc(Operands),
3286             isGFX9() ? "expected a 12-bit unsigned offset" :
3287                        "expected an 11-bit unsigned offset");
3288       return false;
3289     }
3290   }
3291 
3292   return true;
3293 }
3294 
3295 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3296   unsigned Opcode = Inst.getOpcode();
3297   const MCInstrDesc &Desc = MII.get(Opcode);
3298   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3299     return true;
3300 
3301   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3302   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3303 
3304   const int OpIndices[] = { Src0Idx, Src1Idx };
3305 
3306   unsigned NumExprs = 0;
3307   unsigned NumLiterals = 0;
3308   uint32_t LiteralValue;
3309 
3310   for (int OpIdx : OpIndices) {
3311     if (OpIdx == -1) break;
3312 
3313     const MCOperand &MO = Inst.getOperand(OpIdx);
3314     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3315     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3316       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3317         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3318         if (NumLiterals == 0 || LiteralValue != Value) {
3319           LiteralValue = Value;
3320           ++NumLiterals;
3321         }
3322       } else if (MO.isExpr()) {
3323         ++NumExprs;
3324       }
3325     }
3326   }
3327 
3328   return NumLiterals + NumExprs <= 1;
3329 }
3330 
3331 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3332   const unsigned Opc = Inst.getOpcode();
3333   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3334       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3335     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3336     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3337 
3338     if (OpSel & ~3)
3339       return false;
3340   }
3341   return true;
3342 }
3343 
3344 // Check if VCC register matches wavefront size
3345 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3346   auto FB = getFeatureBits();
3347   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3348     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3349 }
3350 
3351 // VOP3 literal is only allowed in GFX10+ and only one can be used
3352 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3353   unsigned Opcode = Inst.getOpcode();
3354   const MCInstrDesc &Desc = MII.get(Opcode);
3355   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3356     return true;
3357 
3358   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3359   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3360   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3361 
3362   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3363 
3364   unsigned NumExprs = 0;
3365   unsigned NumLiterals = 0;
3366   uint32_t LiteralValue;
3367 
3368   for (int OpIdx : OpIndices) {
3369     if (OpIdx == -1) break;
3370 
3371     const MCOperand &MO = Inst.getOperand(OpIdx);
3372     if (!MO.isImm() && !MO.isExpr())
3373       continue;
3374     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3375       continue;
3376 
3377     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3378         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3379       return false;
3380 
3381     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3382       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3383       if (NumLiterals == 0 || LiteralValue != Value) {
3384         LiteralValue = Value;
3385         ++NumLiterals;
3386       }
3387     } else if (MO.isExpr()) {
3388       ++NumExprs;
3389     }
3390   }
3391   NumLiterals += NumExprs;
3392 
3393   return !NumLiterals ||
3394          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3395 }
3396 
3397 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3398                                           const SMLoc &IDLoc,
3399                                           const OperandVector &Operands) {
3400   if (!validateLdsDirect(Inst)) {
3401     Error(IDLoc,
3402       "invalid use of lds_direct");
3403     return false;
3404   }
3405   if (!validateSOPLiteral(Inst)) {
3406     Error(IDLoc,
3407       "only one literal operand is allowed");
3408     return false;
3409   }
3410   if (!validateVOP3Literal(Inst)) {
3411     Error(IDLoc,
3412       "invalid literal operand");
3413     return false;
3414   }
3415   if (!validateConstantBusLimitations(Inst)) {
3416     Error(IDLoc,
3417       "invalid operand (violates constant bus restrictions)");
3418     return false;
3419   }
3420   if (!validateEarlyClobberLimitations(Inst)) {
3421     Error(IDLoc,
3422       "destination must be different than all sources");
3423     return false;
3424   }
3425   if (!validateIntClampSupported(Inst)) {
3426     Error(IDLoc,
3427       "integer clamping is not supported on this GPU");
3428     return false;
3429   }
3430   if (!validateOpSel(Inst)) {
3431     Error(IDLoc,
3432       "invalid op_sel operand");
3433     return false;
3434   }
3435   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3436   if (!validateMIMGD16(Inst)) {
3437     Error(IDLoc,
3438       "d16 modifier is not supported on this GPU");
3439     return false;
3440   }
3441   if (!validateMIMGDim(Inst)) {
3442     Error(IDLoc, "dim modifier is required on this GPU");
3443     return false;
3444   }
3445   if (!validateMIMGDataSize(Inst)) {
3446     Error(IDLoc,
3447       "image data size does not match dmask and tfe");
3448     return false;
3449   }
3450   if (!validateMIMGAddrSize(Inst)) {
3451     Error(IDLoc,
3452       "image address size does not match dim and a16");
3453     return false;
3454   }
3455   if (!validateMIMGAtomicDMask(Inst)) {
3456     Error(IDLoc,
3457       "invalid atomic image dmask");
3458     return false;
3459   }
3460   if (!validateMIMGGatherDMask(Inst)) {
3461     Error(IDLoc,
3462       "invalid image_gather dmask: only one bit must be set");
3463     return false;
3464   }
3465   if (!validateFlatOffset(Inst, Operands)) {
3466     return false;
3467   }
3468 
3469   return true;
3470 }
3471 
3472 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3473                                             const FeatureBitset &FBS,
3474                                             unsigned VariantID = 0);
3475 
3476 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3477                                               OperandVector &Operands,
3478                                               MCStreamer &Out,
3479                                               uint64_t &ErrorInfo,
3480                                               bool MatchingInlineAsm) {
3481   MCInst Inst;
3482   unsigned Result = Match_Success;
3483   for (auto Variant : getMatchedVariants()) {
3484     uint64_t EI;
3485     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3486                                   Variant);
3487     // We order match statuses from least to most specific. We use most specific
3488     // status as resulting
3489     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3490     if ((R == Match_Success) ||
3491         (R == Match_PreferE32) ||
3492         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3493         (R == Match_InvalidOperand && Result != Match_MissingFeature
3494                                    && Result != Match_PreferE32) ||
3495         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3496                                    && Result != Match_MissingFeature
3497                                    && Result != Match_PreferE32)) {
3498       Result = R;
3499       ErrorInfo = EI;
3500     }
3501     if (R == Match_Success)
3502       break;
3503   }
3504 
3505   switch (Result) {
3506   default: break;
3507   case Match_Success:
3508     if (!validateInstruction(Inst, IDLoc, Operands)) {
3509       return true;
3510     }
3511     Inst.setLoc(IDLoc);
3512     Out.EmitInstruction(Inst, getSTI());
3513     return false;
3514 
3515   case Match_MissingFeature:
3516     return Error(IDLoc, "instruction not supported on this GPU");
3517 
3518   case Match_MnemonicFail: {
3519     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3520     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3521         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3522     return Error(IDLoc, "invalid instruction" + Suggestion,
3523                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3524   }
3525 
3526   case Match_InvalidOperand: {
3527     SMLoc ErrorLoc = IDLoc;
3528     if (ErrorInfo != ~0ULL) {
3529       if (ErrorInfo >= Operands.size()) {
3530         return Error(IDLoc, "too few operands for instruction");
3531       }
3532       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3533       if (ErrorLoc == SMLoc())
3534         ErrorLoc = IDLoc;
3535     }
3536     return Error(ErrorLoc, "invalid operand for instruction");
3537   }
3538 
3539   case Match_PreferE32:
3540     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3541                         "should be encoded as e32");
3542   }
3543   llvm_unreachable("Implement any new match types added!");
3544 }
3545 
3546 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3547   int64_t Tmp = -1;
3548   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3549     return true;
3550   }
3551   if (getParser().parseAbsoluteExpression(Tmp)) {
3552     return true;
3553   }
3554   Ret = static_cast<uint32_t>(Tmp);
3555   return false;
3556 }
3557 
3558 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3559                                                uint32_t &Minor) {
3560   if (ParseAsAbsoluteExpression(Major))
3561     return TokError("invalid major version");
3562 
3563   if (getLexer().isNot(AsmToken::Comma))
3564     return TokError("minor version number required, comma expected");
3565   Lex();
3566 
3567   if (ParseAsAbsoluteExpression(Minor))
3568     return TokError("invalid minor version");
3569 
3570   return false;
3571 }
3572 
3573 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3574   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3575     return TokError("directive only supported for amdgcn architecture");
3576 
3577   std::string Target;
3578 
3579   SMLoc TargetStart = getTok().getLoc();
3580   if (getParser().parseEscapedString(Target))
3581     return true;
3582   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3583 
3584   std::string ExpectedTarget;
3585   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3586   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3587 
3588   if (Target != ExpectedTargetOS.str())
3589     return getParser().Error(TargetRange.Start, "target must match options",
3590                              TargetRange);
3591 
3592   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3593   return false;
3594 }
3595 
3596 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3597   return getParser().Error(Range.Start, "value out of range", Range);
3598 }
3599 
3600 bool AMDGPUAsmParser::calculateGPRBlocks(
3601     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3602     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3603     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3604     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3605   // TODO(scott.linder): These calculations are duplicated from
3606   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3607   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3608 
3609   unsigned NumVGPRs = NextFreeVGPR;
3610   unsigned NumSGPRs = NextFreeSGPR;
3611 
3612   if (Version.Major >= 10)
3613     NumSGPRs = 0;
3614   else {
3615     unsigned MaxAddressableNumSGPRs =
3616         IsaInfo::getAddressableNumSGPRs(&getSTI());
3617 
3618     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3619         NumSGPRs > MaxAddressableNumSGPRs)
3620       return OutOfRangeError(SGPRRange);
3621 
3622     NumSGPRs +=
3623         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3624 
3625     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3626         NumSGPRs > MaxAddressableNumSGPRs)
3627       return OutOfRangeError(SGPRRange);
3628 
3629     if (Features.test(FeatureSGPRInitBug))
3630       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3631   }
3632 
3633   VGPRBlocks =
3634       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3635   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3636 
3637   return false;
3638 }
3639 
3640 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3641   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3642     return TokError("directive only supported for amdgcn architecture");
3643 
3644   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3645     return TokError("directive only supported for amdhsa OS");
3646 
3647   StringRef KernelName;
3648   if (getParser().parseIdentifier(KernelName))
3649     return true;
3650 
3651   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3652 
3653   StringSet<> Seen;
3654 
3655   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3656 
3657   SMRange VGPRRange;
3658   uint64_t NextFreeVGPR = 0;
3659   SMRange SGPRRange;
3660   uint64_t NextFreeSGPR = 0;
3661   unsigned UserSGPRCount = 0;
3662   bool ReserveVCC = true;
3663   bool ReserveFlatScr = true;
3664   bool ReserveXNACK = hasXNACK();
3665   Optional<bool> EnableWavefrontSize32;
3666 
3667   while (true) {
3668     while (getLexer().is(AsmToken::EndOfStatement))
3669       Lex();
3670 
3671     if (getLexer().isNot(AsmToken::Identifier))
3672       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3673 
3674     StringRef ID = getTok().getIdentifier();
3675     SMRange IDRange = getTok().getLocRange();
3676     Lex();
3677 
3678     if (ID == ".end_amdhsa_kernel")
3679       break;
3680 
3681     if (Seen.find(ID) != Seen.end())
3682       return TokError(".amdhsa_ directives cannot be repeated");
3683     Seen.insert(ID);
3684 
3685     SMLoc ValStart = getTok().getLoc();
3686     int64_t IVal;
3687     if (getParser().parseAbsoluteExpression(IVal))
3688       return true;
3689     SMLoc ValEnd = getTok().getLoc();
3690     SMRange ValRange = SMRange(ValStart, ValEnd);
3691 
3692     if (IVal < 0)
3693       return OutOfRangeError(ValRange);
3694 
3695     uint64_t Val = IVal;
3696 
3697 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3698   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3699     return OutOfRangeError(RANGE);                                             \
3700   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3701 
3702     if (ID == ".amdhsa_group_segment_fixed_size") {
3703       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3704         return OutOfRangeError(ValRange);
3705       KD.group_segment_fixed_size = Val;
3706     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3707       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3708         return OutOfRangeError(ValRange);
3709       KD.private_segment_fixed_size = Val;
3710     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3711       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3712                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3713                        Val, ValRange);
3714       if (Val)
3715         UserSGPRCount += 4;
3716     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3717       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3718                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3719                        ValRange);
3720       if (Val)
3721         UserSGPRCount += 2;
3722     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3723       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3724                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3725                        ValRange);
3726       if (Val)
3727         UserSGPRCount += 2;
3728     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3729       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3730                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3731                        Val, ValRange);
3732       if (Val)
3733         UserSGPRCount += 2;
3734     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3735       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3736                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3737                        ValRange);
3738       if (Val)
3739         UserSGPRCount += 2;
3740     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3741       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3742                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3743                        ValRange);
3744       if (Val)
3745         UserSGPRCount += 2;
3746     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3747       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3748                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3749                        Val, ValRange);
3750       if (Val)
3751         UserSGPRCount += 1;
3752     } else if (ID == ".amdhsa_wavefront_size32") {
3753       if (IVersion.Major < 10)
3754         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3755                                  IDRange);
3756       EnableWavefrontSize32 = Val;
3757       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3758                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3759                        Val, ValRange);
3760     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3761       PARSE_BITS_ENTRY(
3762           KD.compute_pgm_rsrc2,
3763           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3764           ValRange);
3765     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3766       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3767                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3768                        ValRange);
3769     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3772                        ValRange);
3773     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3775                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3776                        ValRange);
3777     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3778       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3779                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3780                        ValRange);
3781     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3782       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3783                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3784                        ValRange);
3785     } else if (ID == ".amdhsa_next_free_vgpr") {
3786       VGPRRange = ValRange;
3787       NextFreeVGPR = Val;
3788     } else if (ID == ".amdhsa_next_free_sgpr") {
3789       SGPRRange = ValRange;
3790       NextFreeSGPR = Val;
3791     } else if (ID == ".amdhsa_reserve_vcc") {
3792       if (!isUInt<1>(Val))
3793         return OutOfRangeError(ValRange);
3794       ReserveVCC = Val;
3795     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3796       if (IVersion.Major < 7)
3797         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3798                                  IDRange);
3799       if (!isUInt<1>(Val))
3800         return OutOfRangeError(ValRange);
3801       ReserveFlatScr = Val;
3802     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3803       if (IVersion.Major < 8)
3804         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3805                                  IDRange);
3806       if (!isUInt<1>(Val))
3807         return OutOfRangeError(ValRange);
3808       ReserveXNACK = Val;
3809     } else if (ID == ".amdhsa_float_round_mode_32") {
3810       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3811                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3812     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3813       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3814                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3815     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3816       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3817                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3818     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3819       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3820                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3821                        ValRange);
3822     } else if (ID == ".amdhsa_dx10_clamp") {
3823       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3824                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3825     } else if (ID == ".amdhsa_ieee_mode") {
3826       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3827                        Val, ValRange);
3828     } else if (ID == ".amdhsa_fp16_overflow") {
3829       if (IVersion.Major < 9)
3830         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3831                                  IDRange);
3832       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3833                        ValRange);
3834     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3835       if (IVersion.Major < 10)
3836         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3837                                  IDRange);
3838       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3839                        ValRange);
3840     } else if (ID == ".amdhsa_memory_ordered") {
3841       if (IVersion.Major < 10)
3842         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3843                                  IDRange);
3844       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3845                        ValRange);
3846     } else if (ID == ".amdhsa_forward_progress") {
3847       if (IVersion.Major < 10)
3848         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3849                                  IDRange);
3850       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3851                        ValRange);
3852     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3853       PARSE_BITS_ENTRY(
3854           KD.compute_pgm_rsrc2,
3855           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3856           ValRange);
3857     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3858       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3859                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3860                        Val, ValRange);
3861     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3862       PARSE_BITS_ENTRY(
3863           KD.compute_pgm_rsrc2,
3864           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3865           ValRange);
3866     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3867       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3868                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3869                        Val, ValRange);
3870     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3871       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3872                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3873                        Val, ValRange);
3874     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3875       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3876                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3877                        Val, ValRange);
3878     } else if (ID == ".amdhsa_exception_int_div_zero") {
3879       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3880                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3881                        Val, ValRange);
3882     } else {
3883       return getParser().Error(IDRange.Start,
3884                                "unknown .amdhsa_kernel directive", IDRange);
3885     }
3886 
3887 #undef PARSE_BITS_ENTRY
3888   }
3889 
3890   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3891     return TokError(".amdhsa_next_free_vgpr directive is required");
3892 
3893   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3894     return TokError(".amdhsa_next_free_sgpr directive is required");
3895 
3896   unsigned VGPRBlocks;
3897   unsigned SGPRBlocks;
3898   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3899                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3900                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3901                          SGPRBlocks))
3902     return true;
3903 
3904   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3905           VGPRBlocks))
3906     return OutOfRangeError(VGPRRange);
3907   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3908                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3909 
3910   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3911           SGPRBlocks))
3912     return OutOfRangeError(SGPRRange);
3913   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3914                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3915                   SGPRBlocks);
3916 
3917   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3918     return TokError("too many user SGPRs enabled");
3919   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3920                   UserSGPRCount);
3921 
3922   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3923       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3924       ReserveFlatScr, ReserveXNACK);
3925   return false;
3926 }
3927 
3928 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3929   uint32_t Major;
3930   uint32_t Minor;
3931 
3932   if (ParseDirectiveMajorMinor(Major, Minor))
3933     return true;
3934 
3935   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3936   return false;
3937 }
3938 
3939 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3940   uint32_t Major;
3941   uint32_t Minor;
3942   uint32_t Stepping;
3943   StringRef VendorName;
3944   StringRef ArchName;
3945 
3946   // If this directive has no arguments, then use the ISA version for the
3947   // targeted GPU.
3948   if (getLexer().is(AsmToken::EndOfStatement)) {
3949     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3950     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3951                                                       ISA.Stepping,
3952                                                       "AMD", "AMDGPU");
3953     return false;
3954   }
3955 
3956   if (ParseDirectiveMajorMinor(Major, Minor))
3957     return true;
3958 
3959   if (getLexer().isNot(AsmToken::Comma))
3960     return TokError("stepping version number required, comma expected");
3961   Lex();
3962 
3963   if (ParseAsAbsoluteExpression(Stepping))
3964     return TokError("invalid stepping version");
3965 
3966   if (getLexer().isNot(AsmToken::Comma))
3967     return TokError("vendor name required, comma expected");
3968   Lex();
3969 
3970   if (getLexer().isNot(AsmToken::String))
3971     return TokError("invalid vendor name");
3972 
3973   VendorName = getLexer().getTok().getStringContents();
3974   Lex();
3975 
3976   if (getLexer().isNot(AsmToken::Comma))
3977     return TokError("arch name required, comma expected");
3978   Lex();
3979 
3980   if (getLexer().isNot(AsmToken::String))
3981     return TokError("invalid arch name");
3982 
3983   ArchName = getLexer().getTok().getStringContents();
3984   Lex();
3985 
3986   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3987                                                     VendorName, ArchName);
3988   return false;
3989 }
3990 
3991 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3992                                                amd_kernel_code_t &Header) {
3993   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3994   // assembly for backwards compatibility.
3995   if (ID == "max_scratch_backing_memory_byte_size") {
3996     Parser.eatToEndOfStatement();
3997     return false;
3998   }
3999 
4000   SmallString<40> ErrStr;
4001   raw_svector_ostream Err(ErrStr);
4002   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4003     return TokError(Err.str());
4004   }
4005   Lex();
4006 
4007   if (ID == "enable_wavefront_size32") {
4008     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4009       if (!isGFX10())
4010         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4011       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4012         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4013     } else {
4014       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4015         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4016     }
4017   }
4018 
4019   if (ID == "wavefront_size") {
4020     if (Header.wavefront_size == 5) {
4021       if (!isGFX10())
4022         return TokError("wavefront_size=5 is only allowed on GFX10+");
4023       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4024         return TokError("wavefront_size=5 requires +WavefrontSize32");
4025     } else if (Header.wavefront_size == 6) {
4026       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4027         return TokError("wavefront_size=6 requires +WavefrontSize64");
4028     }
4029   }
4030 
4031   if (ID == "enable_wgp_mode") {
4032     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4033       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4034   }
4035 
4036   if (ID == "enable_mem_ordered") {
4037     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4038       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4039   }
4040 
4041   if (ID == "enable_fwd_progress") {
4042     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4043       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4044   }
4045 
4046   return false;
4047 }
4048 
4049 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4050   amd_kernel_code_t Header;
4051   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4052 
4053   while (true) {
4054     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4055     // will set the current token to EndOfStatement.
4056     while(getLexer().is(AsmToken::EndOfStatement))
4057       Lex();
4058 
4059     if (getLexer().isNot(AsmToken::Identifier))
4060       return TokError("expected value identifier or .end_amd_kernel_code_t");
4061 
4062     StringRef ID = getLexer().getTok().getIdentifier();
4063     Lex();
4064 
4065     if (ID == ".end_amd_kernel_code_t")
4066       break;
4067 
4068     if (ParseAMDKernelCodeTValue(ID, Header))
4069       return true;
4070   }
4071 
4072   getTargetStreamer().EmitAMDKernelCodeT(Header);
4073 
4074   return false;
4075 }
4076 
4077 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4078   if (getLexer().isNot(AsmToken::Identifier))
4079     return TokError("expected symbol name");
4080 
4081   StringRef KernelName = Parser.getTok().getString();
4082 
4083   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4084                                            ELF::STT_AMDGPU_HSA_KERNEL);
4085   Lex();
4086   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4087     KernelScope.initialize(getContext());
4088   return false;
4089 }
4090 
4091 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4092   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4093     return Error(getParser().getTok().getLoc(),
4094                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4095                  "architectures");
4096   }
4097 
4098   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4099 
4100   std::string ISAVersionStringFromSTI;
4101   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4102   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4103 
4104   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4105     return Error(getParser().getTok().getLoc(),
4106                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4107                  "arguments specified through the command line");
4108   }
4109 
4110   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4111   Lex();
4112 
4113   return false;
4114 }
4115 
4116 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4117   const char *AssemblerDirectiveBegin;
4118   const char *AssemblerDirectiveEnd;
4119   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4120       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4121           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4122                             HSAMD::V3::AssemblerDirectiveEnd)
4123           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4124                             HSAMD::AssemblerDirectiveEnd);
4125 
4126   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4127     return Error(getParser().getTok().getLoc(),
4128                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4129                  "not available on non-amdhsa OSes")).str());
4130   }
4131 
4132   std::string HSAMetadataString;
4133   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4134                           HSAMetadataString))
4135     return true;
4136 
4137   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4138     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4139       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4140   } else {
4141     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4142       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4143   }
4144 
4145   return false;
4146 }
4147 
4148 /// Common code to parse out a block of text (typically YAML) between start and
4149 /// end directives.
4150 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4151                                           const char *AssemblerDirectiveEnd,
4152                                           std::string &CollectString) {
4153 
4154   raw_string_ostream CollectStream(CollectString);
4155 
4156   getLexer().setSkipSpace(false);
4157 
4158   bool FoundEnd = false;
4159   while (!getLexer().is(AsmToken::Eof)) {
4160     while (getLexer().is(AsmToken::Space)) {
4161       CollectStream << getLexer().getTok().getString();
4162       Lex();
4163     }
4164 
4165     if (getLexer().is(AsmToken::Identifier)) {
4166       StringRef ID = getLexer().getTok().getIdentifier();
4167       if (ID == AssemblerDirectiveEnd) {
4168         Lex();
4169         FoundEnd = true;
4170         break;
4171       }
4172     }
4173 
4174     CollectStream << Parser.parseStringToEndOfStatement()
4175                   << getContext().getAsmInfo()->getSeparatorString();
4176 
4177     Parser.eatToEndOfStatement();
4178   }
4179 
4180   getLexer().setSkipSpace(true);
4181 
4182   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4183     return TokError(Twine("expected directive ") +
4184                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4185   }
4186 
4187   CollectStream.flush();
4188   return false;
4189 }
4190 
4191 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4192 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4193   std::string String;
4194   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4195                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4196     return true;
4197 
4198   auto PALMetadata = getTargetStreamer().getPALMetadata();
4199   if (!PALMetadata->setFromString(String))
4200     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4201   return false;
4202 }
4203 
4204 /// Parse the assembler directive for old linear-format PAL metadata.
4205 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4206   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4207     return Error(getParser().getTok().getLoc(),
4208                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4209                  "not available on non-amdpal OSes")).str());
4210   }
4211 
4212   auto PALMetadata = getTargetStreamer().getPALMetadata();
4213   PALMetadata->setLegacy();
4214   for (;;) {
4215     uint32_t Key, Value;
4216     if (ParseAsAbsoluteExpression(Key)) {
4217       return TokError(Twine("invalid value in ") +
4218                       Twine(PALMD::AssemblerDirective));
4219     }
4220     if (getLexer().isNot(AsmToken::Comma)) {
4221       return TokError(Twine("expected an even number of values in ") +
4222                       Twine(PALMD::AssemblerDirective));
4223     }
4224     Lex();
4225     if (ParseAsAbsoluteExpression(Value)) {
4226       return TokError(Twine("invalid value in ") +
4227                       Twine(PALMD::AssemblerDirective));
4228     }
4229     PALMetadata->setRegister(Key, Value);
4230     if (getLexer().isNot(AsmToken::Comma))
4231       break;
4232     Lex();
4233   }
4234   return false;
4235 }
4236 
4237 /// ParseDirectiveAMDGPULDS
4238 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4239 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4240   if (getParser().checkForValidSection())
4241     return true;
4242 
4243   StringRef Name;
4244   SMLoc NameLoc = getLexer().getLoc();
4245   if (getParser().parseIdentifier(Name))
4246     return TokError("expected identifier in directive");
4247 
4248   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4249   if (parseToken(AsmToken::Comma, "expected ','"))
4250     return true;
4251 
4252   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4253 
4254   int64_t Size;
4255   SMLoc SizeLoc = getLexer().getLoc();
4256   if (getParser().parseAbsoluteExpression(Size))
4257     return true;
4258   if (Size < 0)
4259     return Error(SizeLoc, "size must be non-negative");
4260   if (Size > LocalMemorySize)
4261     return Error(SizeLoc, "size is too large");
4262 
4263   int64_t Align = 4;
4264   if (getLexer().is(AsmToken::Comma)) {
4265     Lex();
4266     SMLoc AlignLoc = getLexer().getLoc();
4267     if (getParser().parseAbsoluteExpression(Align))
4268       return true;
4269     if (Align < 0 || !isPowerOf2_64(Align))
4270       return Error(AlignLoc, "alignment must be a power of two");
4271 
4272     // Alignment larger than the size of LDS is possible in theory, as long
4273     // as the linker manages to place to symbol at address 0, but we do want
4274     // to make sure the alignment fits nicely into a 32-bit integer.
4275     if (Align >= 1u << 31)
4276       return Error(AlignLoc, "alignment is too large");
4277   }
4278 
4279   if (parseToken(AsmToken::EndOfStatement,
4280                  "unexpected token in '.amdgpu_lds' directive"))
4281     return true;
4282 
4283   Symbol->redefineIfPossible();
4284   if (!Symbol->isUndefined())
4285     return Error(NameLoc, "invalid symbol redefinition");
4286 
4287   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4288   return false;
4289 }
4290 
4291 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4292   StringRef IDVal = DirectiveID.getString();
4293 
4294   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4295     if (IDVal == ".amdgcn_target")
4296       return ParseDirectiveAMDGCNTarget();
4297 
4298     if (IDVal == ".amdhsa_kernel")
4299       return ParseDirectiveAMDHSAKernel();
4300 
4301     // TODO: Restructure/combine with PAL metadata directive.
4302     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4303       return ParseDirectiveHSAMetadata();
4304   } else {
4305     if (IDVal == ".hsa_code_object_version")
4306       return ParseDirectiveHSACodeObjectVersion();
4307 
4308     if (IDVal == ".hsa_code_object_isa")
4309       return ParseDirectiveHSACodeObjectISA();
4310 
4311     if (IDVal == ".amd_kernel_code_t")
4312       return ParseDirectiveAMDKernelCodeT();
4313 
4314     if (IDVal == ".amdgpu_hsa_kernel")
4315       return ParseDirectiveAMDGPUHsaKernel();
4316 
4317     if (IDVal == ".amd_amdgpu_isa")
4318       return ParseDirectiveISAVersion();
4319 
4320     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4321       return ParseDirectiveHSAMetadata();
4322   }
4323 
4324   if (IDVal == ".amdgpu_lds")
4325     return ParseDirectiveAMDGPULDS();
4326 
4327   if (IDVal == PALMD::AssemblerDirectiveBegin)
4328     return ParseDirectivePALMetadataBegin();
4329 
4330   if (IDVal == PALMD::AssemblerDirective)
4331     return ParseDirectivePALMetadata();
4332 
4333   return true;
4334 }
4335 
4336 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4337                                            unsigned RegNo) const {
4338 
4339   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4340        R.isValid(); ++R) {
4341     if (*R == RegNo)
4342       return isGFX9() || isGFX10();
4343   }
4344 
4345   // GFX10 has 2 more SGPRs 104 and 105.
4346   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4347        R.isValid(); ++R) {
4348     if (*R == RegNo)
4349       return hasSGPR104_SGPR105();
4350   }
4351 
4352   switch (RegNo) {
4353   case AMDGPU::SRC_SHARED_BASE:
4354   case AMDGPU::SRC_SHARED_LIMIT:
4355   case AMDGPU::SRC_PRIVATE_BASE:
4356   case AMDGPU::SRC_PRIVATE_LIMIT:
4357   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4358     return !isCI() && !isSI() && !isVI();
4359   case AMDGPU::TBA:
4360   case AMDGPU::TBA_LO:
4361   case AMDGPU::TBA_HI:
4362   case AMDGPU::TMA:
4363   case AMDGPU::TMA_LO:
4364   case AMDGPU::TMA_HI:
4365     return !isGFX9() && !isGFX10();
4366   case AMDGPU::XNACK_MASK:
4367   case AMDGPU::XNACK_MASK_LO:
4368   case AMDGPU::XNACK_MASK_HI:
4369     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4370   case AMDGPU::SGPR_NULL:
4371     return isGFX10();
4372   default:
4373     break;
4374   }
4375 
4376   if (isCI())
4377     return true;
4378 
4379   if (isSI() || isGFX10()) {
4380     // No flat_scr on SI.
4381     // On GFX10 flat scratch is not a valid register operand and can only be
4382     // accessed with s_setreg/s_getreg.
4383     switch (RegNo) {
4384     case AMDGPU::FLAT_SCR:
4385     case AMDGPU::FLAT_SCR_LO:
4386     case AMDGPU::FLAT_SCR_HI:
4387       return false;
4388     default:
4389       return true;
4390     }
4391   }
4392 
4393   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4394   // SI/CI have.
4395   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4396        R.isValid(); ++R) {
4397     if (*R == RegNo)
4398       return hasSGPR102_SGPR103();
4399   }
4400 
4401   return true;
4402 }
4403 
4404 OperandMatchResultTy
4405 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4406                               OperandMode Mode) {
4407   // Try to parse with a custom parser
4408   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4409 
4410   // If we successfully parsed the operand or if there as an error parsing,
4411   // we are done.
4412   //
4413   // If we are parsing after we reach EndOfStatement then this means we
4414   // are appending default values to the Operands list.  This is only done
4415   // by custom parser, so we shouldn't continue on to the generic parsing.
4416   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4417       getLexer().is(AsmToken::EndOfStatement))
4418     return ResTy;
4419 
4420   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4421     unsigned Prefix = Operands.size();
4422     SMLoc LBraceLoc = getTok().getLoc();
4423     Parser.Lex(); // eat the '['
4424 
4425     for (;;) {
4426       ResTy = parseReg(Operands);
4427       if (ResTy != MatchOperand_Success)
4428         return ResTy;
4429 
4430       if (getLexer().is(AsmToken::RBrac))
4431         break;
4432 
4433       if (getLexer().isNot(AsmToken::Comma))
4434         return MatchOperand_ParseFail;
4435       Parser.Lex();
4436     }
4437 
4438     if (Operands.size() - Prefix > 1) {
4439       Operands.insert(Operands.begin() + Prefix,
4440                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4441       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4442                                                     getTok().getLoc()));
4443     }
4444 
4445     Parser.Lex(); // eat the ']'
4446     return MatchOperand_Success;
4447   }
4448 
4449   return parseRegOrImm(Operands);
4450 }
4451 
4452 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4453   // Clear any forced encodings from the previous instruction.
4454   setForcedEncodingSize(0);
4455   setForcedDPP(false);
4456   setForcedSDWA(false);
4457 
4458   if (Name.endswith("_e64")) {
4459     setForcedEncodingSize(64);
4460     return Name.substr(0, Name.size() - 4);
4461   } else if (Name.endswith("_e32")) {
4462     setForcedEncodingSize(32);
4463     return Name.substr(0, Name.size() - 4);
4464   } else if (Name.endswith("_dpp")) {
4465     setForcedDPP(true);
4466     return Name.substr(0, Name.size() - 4);
4467   } else if (Name.endswith("_sdwa")) {
4468     setForcedSDWA(true);
4469     return Name.substr(0, Name.size() - 5);
4470   }
4471   return Name;
4472 }
4473 
4474 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4475                                        StringRef Name,
4476                                        SMLoc NameLoc, OperandVector &Operands) {
4477   // Add the instruction mnemonic
4478   Name = parseMnemonicSuffix(Name);
4479   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4480 
4481   bool IsMIMG = Name.startswith("image_");
4482 
4483   while (!getLexer().is(AsmToken::EndOfStatement)) {
4484     OperandMode Mode = OperandMode_Default;
4485     if (IsMIMG && isGFX10() && Operands.size() == 2)
4486       Mode = OperandMode_NSA;
4487     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4488 
4489     // Eat the comma or space if there is one.
4490     if (getLexer().is(AsmToken::Comma))
4491       Parser.Lex();
4492 
4493     switch (Res) {
4494       case MatchOperand_Success: break;
4495       case MatchOperand_ParseFail:
4496         // FIXME: use real operand location rather than the current location.
4497         Error(getLexer().getLoc(), "failed parsing operand.");
4498         while (!getLexer().is(AsmToken::EndOfStatement)) {
4499           Parser.Lex();
4500         }
4501         return true;
4502       case MatchOperand_NoMatch:
4503         // FIXME: use real operand location rather than the current location.
4504         Error(getLexer().getLoc(), "not a valid operand.");
4505         while (!getLexer().is(AsmToken::EndOfStatement)) {
4506           Parser.Lex();
4507         }
4508         return true;
4509     }
4510   }
4511 
4512   return false;
4513 }
4514 
4515 //===----------------------------------------------------------------------===//
4516 // Utility functions
4517 //===----------------------------------------------------------------------===//
4518 
4519 OperandMatchResultTy
4520 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4521 
4522   if (!trySkipId(Prefix, AsmToken::Colon))
4523     return MatchOperand_NoMatch;
4524 
4525   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4526 }
4527 
4528 OperandMatchResultTy
4529 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4530                                     AMDGPUOperand::ImmTy ImmTy,
4531                                     bool (*ConvertResult)(int64_t&)) {
4532   SMLoc S = getLoc();
4533   int64_t Value = 0;
4534 
4535   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4536   if (Res != MatchOperand_Success)
4537     return Res;
4538 
4539   if (ConvertResult && !ConvertResult(Value)) {
4540     Error(S, "invalid " + StringRef(Prefix) + " value.");
4541   }
4542 
4543   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4544   return MatchOperand_Success;
4545 }
4546 
4547 OperandMatchResultTy
4548 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4549                                              OperandVector &Operands,
4550                                              AMDGPUOperand::ImmTy ImmTy,
4551                                              bool (*ConvertResult)(int64_t&)) {
4552   SMLoc S = getLoc();
4553   if (!trySkipId(Prefix, AsmToken::Colon))
4554     return MatchOperand_NoMatch;
4555 
4556   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4557     return MatchOperand_ParseFail;
4558 
4559   unsigned Val = 0;
4560   const unsigned MaxSize = 4;
4561 
4562   // FIXME: How to verify the number of elements matches the number of src
4563   // operands?
4564   for (int I = 0; ; ++I) {
4565     int64_t Op;
4566     SMLoc Loc = getLoc();
4567     if (!parseExpr(Op))
4568       return MatchOperand_ParseFail;
4569 
4570     if (Op != 0 && Op != 1) {
4571       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4572       return MatchOperand_ParseFail;
4573     }
4574 
4575     Val |= (Op << I);
4576 
4577     if (trySkipToken(AsmToken::RBrac))
4578       break;
4579 
4580     if (I + 1 == MaxSize) {
4581       Error(getLoc(), "expected a closing square bracket");
4582       return MatchOperand_ParseFail;
4583     }
4584 
4585     if (!skipToken(AsmToken::Comma, "expected a comma"))
4586       return MatchOperand_ParseFail;
4587   }
4588 
4589   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4590   return MatchOperand_Success;
4591 }
4592 
4593 OperandMatchResultTy
4594 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4595                                AMDGPUOperand::ImmTy ImmTy) {
4596   int64_t Bit = 0;
4597   SMLoc S = Parser.getTok().getLoc();
4598 
4599   // We are at the end of the statement, and this is a default argument, so
4600   // use a default value.
4601   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4602     switch(getLexer().getKind()) {
4603       case AsmToken::Identifier: {
4604         StringRef Tok = Parser.getTok().getString();
4605         if (Tok == Name) {
4606           if (Tok == "r128" && isGFX9())
4607             Error(S, "r128 modifier is not supported on this GPU");
4608           if (Tok == "a16" && !isGFX9() && !isGFX10())
4609             Error(S, "a16 modifier is not supported on this GPU");
4610           Bit = 1;
4611           Parser.Lex();
4612         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4613           Bit = 0;
4614           Parser.Lex();
4615         } else {
4616           return MatchOperand_NoMatch;
4617         }
4618         break;
4619       }
4620       default:
4621         return MatchOperand_NoMatch;
4622     }
4623   }
4624 
4625   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4626     return MatchOperand_ParseFail;
4627 
4628   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4629   return MatchOperand_Success;
4630 }
4631 
4632 static void addOptionalImmOperand(
4633   MCInst& Inst, const OperandVector& Operands,
4634   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4635   AMDGPUOperand::ImmTy ImmT,
4636   int64_t Default = 0) {
4637   auto i = OptionalIdx.find(ImmT);
4638   if (i != OptionalIdx.end()) {
4639     unsigned Idx = i->second;
4640     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4641   } else {
4642     Inst.addOperand(MCOperand::createImm(Default));
4643   }
4644 }
4645 
4646 OperandMatchResultTy
4647 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4648   if (getLexer().isNot(AsmToken::Identifier)) {
4649     return MatchOperand_NoMatch;
4650   }
4651   StringRef Tok = Parser.getTok().getString();
4652   if (Tok != Prefix) {
4653     return MatchOperand_NoMatch;
4654   }
4655 
4656   Parser.Lex();
4657   if (getLexer().isNot(AsmToken::Colon)) {
4658     return MatchOperand_ParseFail;
4659   }
4660 
4661   Parser.Lex();
4662   if (getLexer().isNot(AsmToken::Identifier)) {
4663     return MatchOperand_ParseFail;
4664   }
4665 
4666   Value = Parser.getTok().getString();
4667   return MatchOperand_Success;
4668 }
4669 
4670 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4671 // values to live in a joint format operand in the MCInst encoding.
4672 OperandMatchResultTy
4673 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4674   SMLoc S = Parser.getTok().getLoc();
4675   int64_t Dfmt = 0, Nfmt = 0;
4676   // dfmt and nfmt can appear in either order, and each is optional.
4677   bool GotDfmt = false, GotNfmt = false;
4678   while (!GotDfmt || !GotNfmt) {
4679     if (!GotDfmt) {
4680       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4681       if (Res != MatchOperand_NoMatch) {
4682         if (Res != MatchOperand_Success)
4683           return Res;
4684         if (Dfmt >= 16) {
4685           Error(Parser.getTok().getLoc(), "out of range dfmt");
4686           return MatchOperand_ParseFail;
4687         }
4688         GotDfmt = true;
4689         Parser.Lex();
4690         continue;
4691       }
4692     }
4693     if (!GotNfmt) {
4694       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4695       if (Res != MatchOperand_NoMatch) {
4696         if (Res != MatchOperand_Success)
4697           return Res;
4698         if (Nfmt >= 8) {
4699           Error(Parser.getTok().getLoc(), "out of range nfmt");
4700           return MatchOperand_ParseFail;
4701         }
4702         GotNfmt = true;
4703         Parser.Lex();
4704         continue;
4705       }
4706     }
4707     break;
4708   }
4709   if (!GotDfmt && !GotNfmt)
4710     return MatchOperand_NoMatch;
4711   auto Format = Dfmt | Nfmt << 4;
4712   Operands.push_back(
4713       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4714   return MatchOperand_Success;
4715 }
4716 
4717 //===----------------------------------------------------------------------===//
4718 // ds
4719 //===----------------------------------------------------------------------===//
4720 
4721 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4722                                     const OperandVector &Operands) {
4723   OptionalImmIndexMap OptionalIdx;
4724 
4725   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4726     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4727 
4728     // Add the register arguments
4729     if (Op.isReg()) {
4730       Op.addRegOperands(Inst, 1);
4731       continue;
4732     }
4733 
4734     // Handle optional arguments
4735     OptionalIdx[Op.getImmTy()] = i;
4736   }
4737 
4738   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4739   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4740   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4741 
4742   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4743 }
4744 
4745 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4746                                 bool IsGdsHardcoded) {
4747   OptionalImmIndexMap OptionalIdx;
4748 
4749   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4750     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4751 
4752     // Add the register arguments
4753     if (Op.isReg()) {
4754       Op.addRegOperands(Inst, 1);
4755       continue;
4756     }
4757 
4758     if (Op.isToken() && Op.getToken() == "gds") {
4759       IsGdsHardcoded = true;
4760       continue;
4761     }
4762 
4763     // Handle optional arguments
4764     OptionalIdx[Op.getImmTy()] = i;
4765   }
4766 
4767   AMDGPUOperand::ImmTy OffsetType =
4768     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4769      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4770      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4771                                                       AMDGPUOperand::ImmTyOffset;
4772 
4773   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4774 
4775   if (!IsGdsHardcoded) {
4776     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4777   }
4778   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4779 }
4780 
4781 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4782   OptionalImmIndexMap OptionalIdx;
4783 
4784   unsigned OperandIdx[4];
4785   unsigned EnMask = 0;
4786   int SrcIdx = 0;
4787 
4788   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4789     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4790 
4791     // Add the register arguments
4792     if (Op.isReg()) {
4793       assert(SrcIdx < 4);
4794       OperandIdx[SrcIdx] = Inst.size();
4795       Op.addRegOperands(Inst, 1);
4796       ++SrcIdx;
4797       continue;
4798     }
4799 
4800     if (Op.isOff()) {
4801       assert(SrcIdx < 4);
4802       OperandIdx[SrcIdx] = Inst.size();
4803       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4804       ++SrcIdx;
4805       continue;
4806     }
4807 
4808     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4809       Op.addImmOperands(Inst, 1);
4810       continue;
4811     }
4812 
4813     if (Op.isToken() && Op.getToken() == "done")
4814       continue;
4815 
4816     // Handle optional arguments
4817     OptionalIdx[Op.getImmTy()] = i;
4818   }
4819 
4820   assert(SrcIdx == 4);
4821 
4822   bool Compr = false;
4823   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4824     Compr = true;
4825     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4826     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4827     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4828   }
4829 
4830   for (auto i = 0; i < SrcIdx; ++i) {
4831     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4832       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4833     }
4834   }
4835 
4836   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4837   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4838 
4839   Inst.addOperand(MCOperand::createImm(EnMask));
4840 }
4841 
4842 //===----------------------------------------------------------------------===//
4843 // s_waitcnt
4844 //===----------------------------------------------------------------------===//
4845 
4846 static bool
4847 encodeCnt(
4848   const AMDGPU::IsaVersion ISA,
4849   int64_t &IntVal,
4850   int64_t CntVal,
4851   bool Saturate,
4852   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4853   unsigned (*decode)(const IsaVersion &Version, unsigned))
4854 {
4855   bool Failed = false;
4856 
4857   IntVal = encode(ISA, IntVal, CntVal);
4858   if (CntVal != decode(ISA, IntVal)) {
4859     if (Saturate) {
4860       IntVal = encode(ISA, IntVal, -1);
4861     } else {
4862       Failed = true;
4863     }
4864   }
4865   return Failed;
4866 }
4867 
4868 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4869 
4870   SMLoc CntLoc = getLoc();
4871   StringRef CntName = getTokenStr();
4872 
4873   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4874       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4875     return false;
4876 
4877   int64_t CntVal;
4878   SMLoc ValLoc = getLoc();
4879   if (!parseExpr(CntVal))
4880     return false;
4881 
4882   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4883 
4884   bool Failed = true;
4885   bool Sat = CntName.endswith("_sat");
4886 
4887   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4888     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4889   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4890     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4891   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4892     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4893   } else {
4894     Error(CntLoc, "invalid counter name " + CntName);
4895     return false;
4896   }
4897 
4898   if (Failed) {
4899     Error(ValLoc, "too large value for " + CntName);
4900     return false;
4901   }
4902 
4903   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4904     return false;
4905 
4906   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4907     if (isToken(AsmToken::EndOfStatement)) {
4908       Error(getLoc(), "expected a counter name");
4909       return false;
4910     }
4911   }
4912 
4913   return true;
4914 }
4915 
4916 OperandMatchResultTy
4917 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4918   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4919   int64_t Waitcnt = getWaitcntBitMask(ISA);
4920   SMLoc S = getLoc();
4921 
4922   // If parse failed, do not return error code
4923   // to avoid excessive error messages.
4924   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4925     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4926   } else {
4927     parseExpr(Waitcnt);
4928   }
4929 
4930   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4931   return MatchOperand_Success;
4932 }
4933 
4934 bool
4935 AMDGPUOperand::isSWaitCnt() const {
4936   return isImm();
4937 }
4938 
4939 //===----------------------------------------------------------------------===//
4940 // hwreg
4941 //===----------------------------------------------------------------------===//
4942 
4943 bool
4944 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4945                                 int64_t &Offset,
4946                                 int64_t &Width) {
4947   using namespace llvm::AMDGPU::Hwreg;
4948 
4949   // The register may be specified by name or using a numeric code
4950   if (isToken(AsmToken::Identifier) &&
4951       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4952     HwReg.IsSymbolic = true;
4953     lex(); // skip message name
4954   } else if (!parseExpr(HwReg.Id)) {
4955     return false;
4956   }
4957 
4958   if (trySkipToken(AsmToken::RParen))
4959     return true;
4960 
4961   // parse optional params
4962   return
4963     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4964     parseExpr(Offset) &&
4965     skipToken(AsmToken::Comma, "expected a comma") &&
4966     parseExpr(Width) &&
4967     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4968 }
4969 
4970 bool
4971 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4972                                const int64_t Offset,
4973                                const int64_t Width,
4974                                const SMLoc Loc) {
4975 
4976   using namespace llvm::AMDGPU::Hwreg;
4977 
4978   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4979     Error(Loc, "specified hardware register is not supported on this GPU");
4980     return false;
4981   } else if (!isValidHwreg(HwReg.Id)) {
4982     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4983     return false;
4984   } else if (!isValidHwregOffset(Offset)) {
4985     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4986     return false;
4987   } else if (!isValidHwregWidth(Width)) {
4988     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4989     return false;
4990   }
4991   return true;
4992 }
4993 
4994 OperandMatchResultTy
4995 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4996   using namespace llvm::AMDGPU::Hwreg;
4997 
4998   int64_t ImmVal = 0;
4999   SMLoc Loc = getLoc();
5000 
5001   // If parse failed, do not return error code
5002   // to avoid excessive error messages.
5003   if (trySkipId("hwreg", AsmToken::LParen)) {
5004     OperandInfoTy HwReg(ID_UNKNOWN_);
5005     int64_t Offset = OFFSET_DEFAULT_;
5006     int64_t Width = WIDTH_DEFAULT_;
5007     if (parseHwregBody(HwReg, Offset, Width) &&
5008         validateHwreg(HwReg, Offset, Width, Loc)) {
5009       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5010     }
5011   } else if (parseExpr(ImmVal)) {
5012     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5013       Error(Loc, "invalid immediate: only 16-bit values are legal");
5014   }
5015 
5016   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5017   return MatchOperand_Success;
5018 }
5019 
5020 bool AMDGPUOperand::isHwreg() const {
5021   return isImmTy(ImmTyHwreg);
5022 }
5023 
5024 //===----------------------------------------------------------------------===//
5025 // sendmsg
5026 //===----------------------------------------------------------------------===//
5027 
5028 bool
5029 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5030                                   OperandInfoTy &Op,
5031                                   OperandInfoTy &Stream) {
5032   using namespace llvm::AMDGPU::SendMsg;
5033 
5034   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5035     Msg.IsSymbolic = true;
5036     lex(); // skip message name
5037   } else if (!parseExpr(Msg.Id)) {
5038     return false;
5039   }
5040 
5041   if (trySkipToken(AsmToken::Comma)) {
5042     Op.IsDefined = true;
5043     if (isToken(AsmToken::Identifier) &&
5044         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5045       lex(); // skip operation name
5046     } else if (!parseExpr(Op.Id)) {
5047       return false;
5048     }
5049 
5050     if (trySkipToken(AsmToken::Comma)) {
5051       Stream.IsDefined = true;
5052       if (!parseExpr(Stream.Id))
5053         return false;
5054     }
5055   }
5056 
5057   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5058 }
5059 
5060 bool
5061 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5062                                  const OperandInfoTy &Op,
5063                                  const OperandInfoTy &Stream,
5064                                  const SMLoc S) {
5065   using namespace llvm::AMDGPU::SendMsg;
5066 
5067   // Validation strictness depends on whether message is specified
5068   // in a symbolc or in a numeric form. In the latter case
5069   // only encoding possibility is checked.
5070   bool Strict = Msg.IsSymbolic;
5071 
5072   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5073     Error(S, "invalid message id");
5074     return false;
5075   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5076     Error(S, Op.IsDefined ?
5077              "message does not support operations" :
5078              "missing message operation");
5079     return false;
5080   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5081     Error(S, "invalid operation id");
5082     return false;
5083   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5084     Error(S, "message operation does not support streams");
5085     return false;
5086   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5087     Error(S, "invalid message stream id");
5088     return false;
5089   }
5090   return true;
5091 }
5092 
5093 OperandMatchResultTy
5094 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5095   using namespace llvm::AMDGPU::SendMsg;
5096 
5097   int64_t ImmVal = 0;
5098   SMLoc Loc = getLoc();
5099 
5100   // If parse failed, do not return error code
5101   // to avoid excessive error messages.
5102   if (trySkipId("sendmsg", AsmToken::LParen)) {
5103     OperandInfoTy Msg(ID_UNKNOWN_);
5104     OperandInfoTy Op(OP_NONE_);
5105     OperandInfoTy Stream(STREAM_ID_NONE_);
5106     if (parseSendMsgBody(Msg, Op, Stream) &&
5107         validateSendMsg(Msg, Op, Stream, Loc)) {
5108       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5109     }
5110   } else if (parseExpr(ImmVal)) {
5111     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5112       Error(Loc, "invalid immediate: only 16-bit values are legal");
5113   }
5114 
5115   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5116   return MatchOperand_Success;
5117 }
5118 
5119 bool AMDGPUOperand::isSendMsg() const {
5120   return isImmTy(ImmTySendMsg);
5121 }
5122 
5123 //===----------------------------------------------------------------------===//
5124 // v_interp
5125 //===----------------------------------------------------------------------===//
5126 
5127 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5128   if (getLexer().getKind() != AsmToken::Identifier)
5129     return MatchOperand_NoMatch;
5130 
5131   StringRef Str = Parser.getTok().getString();
5132   int Slot = StringSwitch<int>(Str)
5133     .Case("p10", 0)
5134     .Case("p20", 1)
5135     .Case("p0", 2)
5136     .Default(-1);
5137 
5138   SMLoc S = Parser.getTok().getLoc();
5139   if (Slot == -1)
5140     return MatchOperand_ParseFail;
5141 
5142   Parser.Lex();
5143   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5144                                               AMDGPUOperand::ImmTyInterpSlot));
5145   return MatchOperand_Success;
5146 }
5147 
5148 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5149   if (getLexer().getKind() != AsmToken::Identifier)
5150     return MatchOperand_NoMatch;
5151 
5152   StringRef Str = Parser.getTok().getString();
5153   if (!Str.startswith("attr"))
5154     return MatchOperand_NoMatch;
5155 
5156   StringRef Chan = Str.take_back(2);
5157   int AttrChan = StringSwitch<int>(Chan)
5158     .Case(".x", 0)
5159     .Case(".y", 1)
5160     .Case(".z", 2)
5161     .Case(".w", 3)
5162     .Default(-1);
5163   if (AttrChan == -1)
5164     return MatchOperand_ParseFail;
5165 
5166   Str = Str.drop_back(2).drop_front(4);
5167 
5168   uint8_t Attr;
5169   if (Str.getAsInteger(10, Attr))
5170     return MatchOperand_ParseFail;
5171 
5172   SMLoc S = Parser.getTok().getLoc();
5173   Parser.Lex();
5174   if (Attr > 63) {
5175     Error(S, "out of bounds attr");
5176     return MatchOperand_Success;
5177   }
5178 
5179   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5180 
5181   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5182                                               AMDGPUOperand::ImmTyInterpAttr));
5183   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5184                                               AMDGPUOperand::ImmTyAttrChan));
5185   return MatchOperand_Success;
5186 }
5187 
5188 //===----------------------------------------------------------------------===//
5189 // exp
5190 //===----------------------------------------------------------------------===//
5191 
5192 void AMDGPUAsmParser::errorExpTgt() {
5193   Error(Parser.getTok().getLoc(), "invalid exp target");
5194 }
5195 
5196 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5197                                                       uint8_t &Val) {
5198   if (Str == "null") {
5199     Val = 9;
5200     return MatchOperand_Success;
5201   }
5202 
5203   if (Str.startswith("mrt")) {
5204     Str = Str.drop_front(3);
5205     if (Str == "z") { // == mrtz
5206       Val = 8;
5207       return MatchOperand_Success;
5208     }
5209 
5210     if (Str.getAsInteger(10, Val))
5211       return MatchOperand_ParseFail;
5212 
5213     if (Val > 7)
5214       errorExpTgt();
5215 
5216     return MatchOperand_Success;
5217   }
5218 
5219   if (Str.startswith("pos")) {
5220     Str = Str.drop_front(3);
5221     if (Str.getAsInteger(10, Val))
5222       return MatchOperand_ParseFail;
5223 
5224     if (Val > 4 || (Val == 4 && !isGFX10()))
5225       errorExpTgt();
5226 
5227     Val += 12;
5228     return MatchOperand_Success;
5229   }
5230 
5231   if (isGFX10() && Str == "prim") {
5232     Val = 20;
5233     return MatchOperand_Success;
5234   }
5235 
5236   if (Str.startswith("param")) {
5237     Str = Str.drop_front(5);
5238     if (Str.getAsInteger(10, Val))
5239       return MatchOperand_ParseFail;
5240 
5241     if (Val >= 32)
5242       errorExpTgt();
5243 
5244     Val += 32;
5245     return MatchOperand_Success;
5246   }
5247 
5248   if (Str.startswith("invalid_target_")) {
5249     Str = Str.drop_front(15);
5250     if (Str.getAsInteger(10, Val))
5251       return MatchOperand_ParseFail;
5252 
5253     errorExpTgt();
5254     return MatchOperand_Success;
5255   }
5256 
5257   return MatchOperand_NoMatch;
5258 }
5259 
5260 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5261   uint8_t Val;
5262   StringRef Str = Parser.getTok().getString();
5263 
5264   auto Res = parseExpTgtImpl(Str, Val);
5265   if (Res != MatchOperand_Success)
5266     return Res;
5267 
5268   SMLoc S = Parser.getTok().getLoc();
5269   Parser.Lex();
5270 
5271   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5272                                               AMDGPUOperand::ImmTyExpTgt));
5273   return MatchOperand_Success;
5274 }
5275 
5276 //===----------------------------------------------------------------------===//
5277 // parser helpers
5278 //===----------------------------------------------------------------------===//
5279 
5280 bool
5281 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5282   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5283 }
5284 
5285 bool
5286 AMDGPUAsmParser::isId(const StringRef Id) const {
5287   return isId(getToken(), Id);
5288 }
5289 
5290 bool
5291 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5292   return getTokenKind() == Kind;
5293 }
5294 
5295 bool
5296 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5297   if (isId(Id)) {
5298     lex();
5299     return true;
5300   }
5301   return false;
5302 }
5303 
5304 bool
5305 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5306   if (isId(Id) && peekToken().is(Kind)) {
5307     lex();
5308     lex();
5309     return true;
5310   }
5311   return false;
5312 }
5313 
5314 bool
5315 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5316   if (isToken(Kind)) {
5317     lex();
5318     return true;
5319   }
5320   return false;
5321 }
5322 
5323 bool
5324 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5325                            const StringRef ErrMsg) {
5326   if (!trySkipToken(Kind)) {
5327     Error(getLoc(), ErrMsg);
5328     return false;
5329   }
5330   return true;
5331 }
5332 
5333 bool
5334 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5335   return !getParser().parseAbsoluteExpression(Imm);
5336 }
5337 
5338 bool
5339 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5340   SMLoc S = getLoc();
5341 
5342   const MCExpr *Expr;
5343   if (Parser.parseExpression(Expr))
5344     return false;
5345 
5346   int64_t IntVal;
5347   if (Expr->evaluateAsAbsolute(IntVal)) {
5348     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5349   } else {
5350     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5351   }
5352   return true;
5353 }
5354 
5355 bool
5356 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5357   if (isToken(AsmToken::String)) {
5358     Val = getToken().getStringContents();
5359     lex();
5360     return true;
5361   } else {
5362     Error(getLoc(), ErrMsg);
5363     return false;
5364   }
5365 }
5366 
5367 AsmToken
5368 AMDGPUAsmParser::getToken() const {
5369   return Parser.getTok();
5370 }
5371 
5372 AsmToken
5373 AMDGPUAsmParser::peekToken() {
5374   return getLexer().peekTok();
5375 }
5376 
5377 void
5378 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5379   auto TokCount = getLexer().peekTokens(Tokens);
5380 
5381   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5382     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5383 }
5384 
5385 AsmToken::TokenKind
5386 AMDGPUAsmParser::getTokenKind() const {
5387   return getLexer().getKind();
5388 }
5389 
5390 SMLoc
5391 AMDGPUAsmParser::getLoc() const {
5392   return getToken().getLoc();
5393 }
5394 
5395 StringRef
5396 AMDGPUAsmParser::getTokenStr() const {
5397   return getToken().getString();
5398 }
5399 
5400 void
5401 AMDGPUAsmParser::lex() {
5402   Parser.Lex();
5403 }
5404 
5405 //===----------------------------------------------------------------------===//
5406 // swizzle
5407 //===----------------------------------------------------------------------===//
5408 
5409 LLVM_READNONE
5410 static unsigned
5411 encodeBitmaskPerm(const unsigned AndMask,
5412                   const unsigned OrMask,
5413                   const unsigned XorMask) {
5414   using namespace llvm::AMDGPU::Swizzle;
5415 
5416   return BITMASK_PERM_ENC |
5417          (AndMask << BITMASK_AND_SHIFT) |
5418          (OrMask  << BITMASK_OR_SHIFT)  |
5419          (XorMask << BITMASK_XOR_SHIFT);
5420 }
5421 
5422 bool
5423 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5424                                       const unsigned MinVal,
5425                                       const unsigned MaxVal,
5426                                       const StringRef ErrMsg) {
5427   for (unsigned i = 0; i < OpNum; ++i) {
5428     if (!skipToken(AsmToken::Comma, "expected a comma")){
5429       return false;
5430     }
5431     SMLoc ExprLoc = Parser.getTok().getLoc();
5432     if (!parseExpr(Op[i])) {
5433       return false;
5434     }
5435     if (Op[i] < MinVal || Op[i] > MaxVal) {
5436       Error(ExprLoc, ErrMsg);
5437       return false;
5438     }
5439   }
5440 
5441   return true;
5442 }
5443 
5444 bool
5445 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5446   using namespace llvm::AMDGPU::Swizzle;
5447 
5448   int64_t Lane[LANE_NUM];
5449   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5450                            "expected a 2-bit lane id")) {
5451     Imm = QUAD_PERM_ENC;
5452     for (unsigned I = 0; I < LANE_NUM; ++I) {
5453       Imm |= Lane[I] << (LANE_SHIFT * I);
5454     }
5455     return true;
5456   }
5457   return false;
5458 }
5459 
5460 bool
5461 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5462   using namespace llvm::AMDGPU::Swizzle;
5463 
5464   SMLoc S = Parser.getTok().getLoc();
5465   int64_t GroupSize;
5466   int64_t LaneIdx;
5467 
5468   if (!parseSwizzleOperands(1, &GroupSize,
5469                             2, 32,
5470                             "group size must be in the interval [2,32]")) {
5471     return false;
5472   }
5473   if (!isPowerOf2_64(GroupSize)) {
5474     Error(S, "group size must be a power of two");
5475     return false;
5476   }
5477   if (parseSwizzleOperands(1, &LaneIdx,
5478                            0, GroupSize - 1,
5479                            "lane id must be in the interval [0,group size - 1]")) {
5480     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5481     return true;
5482   }
5483   return false;
5484 }
5485 
5486 bool
5487 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5488   using namespace llvm::AMDGPU::Swizzle;
5489 
5490   SMLoc S = Parser.getTok().getLoc();
5491   int64_t GroupSize;
5492 
5493   if (!parseSwizzleOperands(1, &GroupSize,
5494       2, 32, "group size must be in the interval [2,32]")) {
5495     return false;
5496   }
5497   if (!isPowerOf2_64(GroupSize)) {
5498     Error(S, "group size must be a power of two");
5499     return false;
5500   }
5501 
5502   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5503   return true;
5504 }
5505 
5506 bool
5507 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5508   using namespace llvm::AMDGPU::Swizzle;
5509 
5510   SMLoc S = Parser.getTok().getLoc();
5511   int64_t GroupSize;
5512 
5513   if (!parseSwizzleOperands(1, &GroupSize,
5514       1, 16, "group size must be in the interval [1,16]")) {
5515     return false;
5516   }
5517   if (!isPowerOf2_64(GroupSize)) {
5518     Error(S, "group size must be a power of two");
5519     return false;
5520   }
5521 
5522   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5523   return true;
5524 }
5525 
5526 bool
5527 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5528   using namespace llvm::AMDGPU::Swizzle;
5529 
5530   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5531     return false;
5532   }
5533 
5534   StringRef Ctl;
5535   SMLoc StrLoc = Parser.getTok().getLoc();
5536   if (!parseString(Ctl)) {
5537     return false;
5538   }
5539   if (Ctl.size() != BITMASK_WIDTH) {
5540     Error(StrLoc, "expected a 5-character mask");
5541     return false;
5542   }
5543 
5544   unsigned AndMask = 0;
5545   unsigned OrMask = 0;
5546   unsigned XorMask = 0;
5547 
5548   for (size_t i = 0; i < Ctl.size(); ++i) {
5549     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5550     switch(Ctl[i]) {
5551     default:
5552       Error(StrLoc, "invalid mask");
5553       return false;
5554     case '0':
5555       break;
5556     case '1':
5557       OrMask |= Mask;
5558       break;
5559     case 'p':
5560       AndMask |= Mask;
5561       break;
5562     case 'i':
5563       AndMask |= Mask;
5564       XorMask |= Mask;
5565       break;
5566     }
5567   }
5568 
5569   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5570   return true;
5571 }
5572 
5573 bool
5574 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5575 
5576   SMLoc OffsetLoc = Parser.getTok().getLoc();
5577 
5578   if (!parseExpr(Imm)) {
5579     return false;
5580   }
5581   if (!isUInt<16>(Imm)) {
5582     Error(OffsetLoc, "expected a 16-bit offset");
5583     return false;
5584   }
5585   return true;
5586 }
5587 
5588 bool
5589 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5590   using namespace llvm::AMDGPU::Swizzle;
5591 
5592   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5593 
5594     SMLoc ModeLoc = Parser.getTok().getLoc();
5595     bool Ok = false;
5596 
5597     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5598       Ok = parseSwizzleQuadPerm(Imm);
5599     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5600       Ok = parseSwizzleBitmaskPerm(Imm);
5601     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5602       Ok = parseSwizzleBroadcast(Imm);
5603     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5604       Ok = parseSwizzleSwap(Imm);
5605     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5606       Ok = parseSwizzleReverse(Imm);
5607     } else {
5608       Error(ModeLoc, "expected a swizzle mode");
5609     }
5610 
5611     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5612   }
5613 
5614   return false;
5615 }
5616 
5617 OperandMatchResultTy
5618 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5619   SMLoc S = Parser.getTok().getLoc();
5620   int64_t Imm = 0;
5621 
5622   if (trySkipId("offset")) {
5623 
5624     bool Ok = false;
5625     if (skipToken(AsmToken::Colon, "expected a colon")) {
5626       if (trySkipId("swizzle")) {
5627         Ok = parseSwizzleMacro(Imm);
5628       } else {
5629         Ok = parseSwizzleOffset(Imm);
5630       }
5631     }
5632 
5633     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5634 
5635     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5636   } else {
5637     // Swizzle "offset" operand is optional.
5638     // If it is omitted, try parsing other optional operands.
5639     return parseOptionalOpr(Operands);
5640   }
5641 }
5642 
5643 bool
5644 AMDGPUOperand::isSwizzle() const {
5645   return isImmTy(ImmTySwizzle);
5646 }
5647 
5648 //===----------------------------------------------------------------------===//
5649 // VGPR Index Mode
5650 //===----------------------------------------------------------------------===//
5651 
5652 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5653 
5654   using namespace llvm::AMDGPU::VGPRIndexMode;
5655 
5656   if (trySkipToken(AsmToken::RParen)) {
5657     return OFF;
5658   }
5659 
5660   int64_t Imm = 0;
5661 
5662   while (true) {
5663     unsigned Mode = 0;
5664     SMLoc S = Parser.getTok().getLoc();
5665 
5666     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5667       if (trySkipId(IdSymbolic[ModeId])) {
5668         Mode = 1 << ModeId;
5669         break;
5670       }
5671     }
5672 
5673     if (Mode == 0) {
5674       Error(S, (Imm == 0)?
5675                "expected a VGPR index mode or a closing parenthesis" :
5676                "expected a VGPR index mode");
5677       break;
5678     }
5679 
5680     if (Imm & Mode) {
5681       Error(S, "duplicate VGPR index mode");
5682       break;
5683     }
5684     Imm |= Mode;
5685 
5686     if (trySkipToken(AsmToken::RParen))
5687       break;
5688     if (!skipToken(AsmToken::Comma,
5689                    "expected a comma or a closing parenthesis"))
5690       break;
5691   }
5692 
5693   return Imm;
5694 }
5695 
5696 OperandMatchResultTy
5697 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5698 
5699   int64_t Imm = 0;
5700   SMLoc S = Parser.getTok().getLoc();
5701 
5702   if (getLexer().getKind() == AsmToken::Identifier &&
5703       Parser.getTok().getString() == "gpr_idx" &&
5704       getLexer().peekTok().is(AsmToken::LParen)) {
5705 
5706     Parser.Lex();
5707     Parser.Lex();
5708 
5709     // If parse failed, trigger an error but do not return error code
5710     // to avoid excessive error messages.
5711     Imm = parseGPRIdxMacro();
5712 
5713   } else {
5714     if (getParser().parseAbsoluteExpression(Imm))
5715       return MatchOperand_NoMatch;
5716     if (Imm < 0 || !isUInt<4>(Imm)) {
5717       Error(S, "invalid immediate: only 4-bit values are legal");
5718     }
5719   }
5720 
5721   Operands.push_back(
5722       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5723   return MatchOperand_Success;
5724 }
5725 
5726 bool AMDGPUOperand::isGPRIdxMode() const {
5727   return isImmTy(ImmTyGprIdxMode);
5728 }
5729 
5730 //===----------------------------------------------------------------------===//
5731 // sopp branch targets
5732 //===----------------------------------------------------------------------===//
5733 
5734 OperandMatchResultTy
5735 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5736 
5737   // Make sure we are not parsing something
5738   // that looks like a label or an expression but is not.
5739   // This will improve error messages.
5740   if (isRegister() || isModifier())
5741     return MatchOperand_NoMatch;
5742 
5743   if (parseExpr(Operands)) {
5744 
5745     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5746     assert(Opr.isImm() || Opr.isExpr());
5747     SMLoc Loc = Opr.getStartLoc();
5748 
5749     // Currently we do not support arbitrary expressions as branch targets.
5750     // Only labels and absolute expressions are accepted.
5751     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5752       Error(Loc, "expected an absolute expression or a label");
5753     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5754       Error(Loc, "expected a 16-bit signed jump offset");
5755     }
5756   }
5757 
5758   return MatchOperand_Success; // avoid excessive error messages
5759 }
5760 
5761 //===----------------------------------------------------------------------===//
5762 // Boolean holding registers
5763 //===----------------------------------------------------------------------===//
5764 
5765 OperandMatchResultTy
5766 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5767   return parseReg(Operands);
5768 }
5769 
5770 //===----------------------------------------------------------------------===//
5771 // mubuf
5772 //===----------------------------------------------------------------------===//
5773 
5774 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5775   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5776 }
5777 
5778 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5779   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5780 }
5781 
5782 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5783   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5784 }
5785 
5786 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5787                                const OperandVector &Operands,
5788                                bool IsAtomic,
5789                                bool IsAtomicReturn,
5790                                bool IsLds) {
5791   bool IsLdsOpcode = IsLds;
5792   bool HasLdsModifier = false;
5793   OptionalImmIndexMap OptionalIdx;
5794   assert(IsAtomicReturn ? IsAtomic : true);
5795   unsigned FirstOperandIdx = 1;
5796 
5797   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5798     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5799 
5800     // Add the register arguments
5801     if (Op.isReg()) {
5802       Op.addRegOperands(Inst, 1);
5803       // Insert a tied src for atomic return dst.
5804       // This cannot be postponed as subsequent calls to
5805       // addImmOperands rely on correct number of MC operands.
5806       if (IsAtomicReturn && i == FirstOperandIdx)
5807         Op.addRegOperands(Inst, 1);
5808       continue;
5809     }
5810 
5811     // Handle the case where soffset is an immediate
5812     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5813       Op.addImmOperands(Inst, 1);
5814       continue;
5815     }
5816 
5817     HasLdsModifier |= Op.isLDS();
5818 
5819     // Handle tokens like 'offen' which are sometimes hard-coded into the
5820     // asm string.  There are no MCInst operands for these.
5821     if (Op.isToken()) {
5822       continue;
5823     }
5824     assert(Op.isImm());
5825 
5826     // Handle optional arguments
5827     OptionalIdx[Op.getImmTy()] = i;
5828   }
5829 
5830   // This is a workaround for an llvm quirk which may result in an
5831   // incorrect instruction selection. Lds and non-lds versions of
5832   // MUBUF instructions are identical except that lds versions
5833   // have mandatory 'lds' modifier. However this modifier follows
5834   // optional modifiers and llvm asm matcher regards this 'lds'
5835   // modifier as an optional one. As a result, an lds version
5836   // of opcode may be selected even if it has no 'lds' modifier.
5837   if (IsLdsOpcode && !HasLdsModifier) {
5838     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5839     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5840       Inst.setOpcode(NoLdsOpcode);
5841       IsLdsOpcode = false;
5842     }
5843   }
5844 
5845   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5846   if (!IsAtomic) { // glc is hard-coded.
5847     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5848   }
5849   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5850 
5851   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5852     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5853   }
5854 
5855   if (isGFX10())
5856     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5857 }
5858 
5859 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5860   OptionalImmIndexMap OptionalIdx;
5861 
5862   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5863     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5864 
5865     // Add the register arguments
5866     if (Op.isReg()) {
5867       Op.addRegOperands(Inst, 1);
5868       continue;
5869     }
5870 
5871     // Handle the case where soffset is an immediate
5872     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5873       Op.addImmOperands(Inst, 1);
5874       continue;
5875     }
5876 
5877     // Handle tokens like 'offen' which are sometimes hard-coded into the
5878     // asm string.  There are no MCInst operands for these.
5879     if (Op.isToken()) {
5880       continue;
5881     }
5882     assert(Op.isImm());
5883 
5884     // Handle optional arguments
5885     OptionalIdx[Op.getImmTy()] = i;
5886   }
5887 
5888   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5889                         AMDGPUOperand::ImmTyOffset);
5890   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5891   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5892   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5893   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5894 
5895   if (isGFX10())
5896     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5897 }
5898 
5899 //===----------------------------------------------------------------------===//
5900 // mimg
5901 //===----------------------------------------------------------------------===//
5902 
5903 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5904                               bool IsAtomic) {
5905   unsigned I = 1;
5906   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5907   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5908     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5909   }
5910 
5911   if (IsAtomic) {
5912     // Add src, same as dst
5913     assert(Desc.getNumDefs() == 1);
5914     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5915   }
5916 
5917   OptionalImmIndexMap OptionalIdx;
5918 
5919   for (unsigned E = Operands.size(); I != E; ++I) {
5920     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5921 
5922     // Add the register arguments
5923     if (Op.isReg()) {
5924       Op.addRegOperands(Inst, 1);
5925     } else if (Op.isImmModifier()) {
5926       OptionalIdx[Op.getImmTy()] = I;
5927     } else if (!Op.isToken()) {
5928       llvm_unreachable("unexpected operand type");
5929     }
5930   }
5931 
5932   bool IsGFX10 = isGFX10();
5933 
5934   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5935   if (IsGFX10)
5936     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5938   if (IsGFX10)
5939     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5940   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5942   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5943   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5944   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5945   if (!IsGFX10)
5946     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5947   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5948 }
5949 
5950 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5951   cvtMIMG(Inst, Operands, true);
5952 }
5953 
5954 //===----------------------------------------------------------------------===//
5955 // smrd
5956 //===----------------------------------------------------------------------===//
5957 
5958 bool AMDGPUOperand::isSMRDOffset8() const {
5959   return isImm() && isUInt<8>(getImm());
5960 }
5961 
5962 bool AMDGPUOperand::isSMRDOffset20() const {
5963   return isImm() && isUInt<20>(getImm());
5964 }
5965 
5966 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5967   // 32-bit literals are only supported on CI and we only want to use them
5968   // when the offset is > 8-bits.
5969   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5970 }
5971 
5972 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5973   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5974 }
5975 
5976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5977   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5978 }
5979 
5980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5981   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5982 }
5983 
5984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5985   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5986 }
5987 
5988 //===----------------------------------------------------------------------===//
5989 // vop3
5990 //===----------------------------------------------------------------------===//
5991 
5992 static bool ConvertOmodMul(int64_t &Mul) {
5993   if (Mul != 1 && Mul != 2 && Mul != 4)
5994     return false;
5995 
5996   Mul >>= 1;
5997   return true;
5998 }
5999 
6000 static bool ConvertOmodDiv(int64_t &Div) {
6001   if (Div == 1) {
6002     Div = 0;
6003     return true;
6004   }
6005 
6006   if (Div == 2) {
6007     Div = 3;
6008     return true;
6009   }
6010 
6011   return false;
6012 }
6013 
6014 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6015   if (BoundCtrl == 0) {
6016     BoundCtrl = 1;
6017     return true;
6018   }
6019 
6020   if (BoundCtrl == -1) {
6021     BoundCtrl = 0;
6022     return true;
6023   }
6024 
6025   return false;
6026 }
6027 
6028 // Note: the order in this table matches the order of operands in AsmString.
6029 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6030   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6031   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6032   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6033   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6034   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6035   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6036   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6037   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6038   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6039   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6040   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6041   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6042   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6043   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6044   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6045   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6046   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6047   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6048   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6049   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6050   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6051   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6052   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6053   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6054   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6055   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6056   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6057   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6058   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6059   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6060   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6061   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6062   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6063   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6064   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6065   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6066   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6067   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6068   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6069   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6070   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6071   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6072   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6073   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6074 };
6075 
6076 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6077   unsigned size = Operands.size();
6078   assert(size > 0);
6079 
6080   OperandMatchResultTy res = parseOptionalOpr(Operands);
6081 
6082   // This is a hack to enable hardcoded mandatory operands which follow
6083   // optional operands.
6084   //
6085   // Current design assumes that all operands after the first optional operand
6086   // are also optional. However implementation of some instructions violates
6087   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6088   //
6089   // To alleviate this problem, we have to (implicitly) parse extra operands
6090   // to make sure autogenerated parser of custom operands never hit hardcoded
6091   // mandatory operands.
6092 
6093   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6094 
6095     // We have parsed the first optional operand.
6096     // Parse as many operands as necessary to skip all mandatory operands.
6097 
6098     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6099       if (res != MatchOperand_Success ||
6100           getLexer().is(AsmToken::EndOfStatement)) break;
6101       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6102       res = parseOptionalOpr(Operands);
6103     }
6104   }
6105 
6106   return res;
6107 }
6108 
6109 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6110   OperandMatchResultTy res;
6111   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6112     // try to parse any optional operand here
6113     if (Op.IsBit) {
6114       res = parseNamedBit(Op.Name, Operands, Op.Type);
6115     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6116       res = parseOModOperand(Operands);
6117     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6118                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6119                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6120       res = parseSDWASel(Operands, Op.Name, Op.Type);
6121     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6122       res = parseSDWADstUnused(Operands);
6123     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6124                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6125                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6126                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6127       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6128                                         Op.ConvertResult);
6129     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6130       res = parseDim(Operands);
6131     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6132       res = parseDfmtNfmt(Operands);
6133     } else {
6134       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6135     }
6136     if (res != MatchOperand_NoMatch) {
6137       return res;
6138     }
6139   }
6140   return MatchOperand_NoMatch;
6141 }
6142 
6143 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6144   StringRef Name = Parser.getTok().getString();
6145   if (Name == "mul") {
6146     return parseIntWithPrefix("mul", Operands,
6147                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6148   }
6149 
6150   if (Name == "div") {
6151     return parseIntWithPrefix("div", Operands,
6152                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6153   }
6154 
6155   return MatchOperand_NoMatch;
6156 }
6157 
6158 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6159   cvtVOP3P(Inst, Operands);
6160 
6161   int Opc = Inst.getOpcode();
6162 
6163   int SrcNum;
6164   const int Ops[] = { AMDGPU::OpName::src0,
6165                       AMDGPU::OpName::src1,
6166                       AMDGPU::OpName::src2 };
6167   for (SrcNum = 0;
6168        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6169        ++SrcNum);
6170   assert(SrcNum > 0);
6171 
6172   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6173   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6174 
6175   if ((OpSel & (1 << SrcNum)) != 0) {
6176     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6177     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6178     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6179   }
6180 }
6181 
6182 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6183       // 1. This operand is input modifiers
6184   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6185       // 2. This is not last operand
6186       && Desc.NumOperands > (OpNum + 1)
6187       // 3. Next operand is register class
6188       && Desc.OpInfo[OpNum + 1].RegClass != -1
6189       // 4. Next register is not tied to any other operand
6190       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6191 }
6192 
6193 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6194 {
6195   OptionalImmIndexMap OptionalIdx;
6196   unsigned Opc = Inst.getOpcode();
6197 
6198   unsigned I = 1;
6199   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6200   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6201     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6202   }
6203 
6204   for (unsigned E = Operands.size(); I != E; ++I) {
6205     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6206     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6207       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6208     } else if (Op.isInterpSlot() ||
6209                Op.isInterpAttr() ||
6210                Op.isAttrChan()) {
6211       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6212     } else if (Op.isImmModifier()) {
6213       OptionalIdx[Op.getImmTy()] = I;
6214     } else {
6215       llvm_unreachable("unhandled operand type");
6216     }
6217   }
6218 
6219   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6220     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6221   }
6222 
6223   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6224     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6225   }
6226 
6227   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6228     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6229   }
6230 }
6231 
6232 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6233                               OptionalImmIndexMap &OptionalIdx) {
6234   unsigned Opc = Inst.getOpcode();
6235 
6236   unsigned I = 1;
6237   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6238   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6239     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6240   }
6241 
6242   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6243     // This instruction has src modifiers
6244     for (unsigned E = Operands.size(); I != E; ++I) {
6245       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6246       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6247         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6248       } else if (Op.isImmModifier()) {
6249         OptionalIdx[Op.getImmTy()] = I;
6250       } else if (Op.isRegOrImm()) {
6251         Op.addRegOrImmOperands(Inst, 1);
6252       } else {
6253         llvm_unreachable("unhandled operand type");
6254       }
6255     }
6256   } else {
6257     // No src modifiers
6258     for (unsigned E = Operands.size(); I != E; ++I) {
6259       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6260       if (Op.isMod()) {
6261         OptionalIdx[Op.getImmTy()] = I;
6262       } else {
6263         Op.addRegOrImmOperands(Inst, 1);
6264       }
6265     }
6266   }
6267 
6268   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6269     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6270   }
6271 
6272   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6273     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6274   }
6275 
6276   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6277   // it has src2 register operand that is tied to dst operand
6278   // we don't allow modifiers for this operand in assembler so src2_modifiers
6279   // should be 0.
6280   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6281       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6282       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6283       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6284       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6285       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6286       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6287     auto it = Inst.begin();
6288     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6289     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6290     ++it;
6291     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6292   }
6293 }
6294 
6295 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6296   OptionalImmIndexMap OptionalIdx;
6297   cvtVOP3(Inst, Operands, OptionalIdx);
6298 }
6299 
6300 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6301                                const OperandVector &Operands) {
6302   OptionalImmIndexMap OptIdx;
6303   const int Opc = Inst.getOpcode();
6304   const MCInstrDesc &Desc = MII.get(Opc);
6305 
6306   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6307 
6308   cvtVOP3(Inst, Operands, OptIdx);
6309 
6310   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6311     assert(!IsPacked);
6312     Inst.addOperand(Inst.getOperand(0));
6313   }
6314 
6315   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6316   // instruction, and then figure out where to actually put the modifiers
6317 
6318   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6319 
6320   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6321   if (OpSelHiIdx != -1) {
6322     int DefaultVal = IsPacked ? -1 : 0;
6323     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6324                           DefaultVal);
6325   }
6326 
6327   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6328   if (NegLoIdx != -1) {
6329     assert(IsPacked);
6330     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6331     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6332   }
6333 
6334   const int Ops[] = { AMDGPU::OpName::src0,
6335                       AMDGPU::OpName::src1,
6336                       AMDGPU::OpName::src2 };
6337   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6338                          AMDGPU::OpName::src1_modifiers,
6339                          AMDGPU::OpName::src2_modifiers };
6340 
6341   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6342 
6343   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6344   unsigned OpSelHi = 0;
6345   unsigned NegLo = 0;
6346   unsigned NegHi = 0;
6347 
6348   if (OpSelHiIdx != -1) {
6349     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6350   }
6351 
6352   if (NegLoIdx != -1) {
6353     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6354     NegLo = Inst.getOperand(NegLoIdx).getImm();
6355     NegHi = Inst.getOperand(NegHiIdx).getImm();
6356   }
6357 
6358   for (int J = 0; J < 3; ++J) {
6359     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6360     if (OpIdx == -1)
6361       break;
6362 
6363     uint32_t ModVal = 0;
6364 
6365     if ((OpSel & (1 << J)) != 0)
6366       ModVal |= SISrcMods::OP_SEL_0;
6367 
6368     if ((OpSelHi & (1 << J)) != 0)
6369       ModVal |= SISrcMods::OP_SEL_1;
6370 
6371     if ((NegLo & (1 << J)) != 0)
6372       ModVal |= SISrcMods::NEG;
6373 
6374     if ((NegHi & (1 << J)) != 0)
6375       ModVal |= SISrcMods::NEG_HI;
6376 
6377     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6378 
6379     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6380   }
6381 }
6382 
6383 //===----------------------------------------------------------------------===//
6384 // dpp
6385 //===----------------------------------------------------------------------===//
6386 
6387 bool AMDGPUOperand::isDPP8() const {
6388   return isImmTy(ImmTyDPP8);
6389 }
6390 
6391 bool AMDGPUOperand::isDPPCtrl() const {
6392   using namespace AMDGPU::DPP;
6393 
6394   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6395   if (result) {
6396     int64_t Imm = getImm();
6397     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6398            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6399            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6400            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6401            (Imm == DppCtrl::WAVE_SHL1) ||
6402            (Imm == DppCtrl::WAVE_ROL1) ||
6403            (Imm == DppCtrl::WAVE_SHR1) ||
6404            (Imm == DppCtrl::WAVE_ROR1) ||
6405            (Imm == DppCtrl::ROW_MIRROR) ||
6406            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6407            (Imm == DppCtrl::BCAST15) ||
6408            (Imm == DppCtrl::BCAST31) ||
6409            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6410            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6411   }
6412   return false;
6413 }
6414 
6415 //===----------------------------------------------------------------------===//
6416 // mAI
6417 //===----------------------------------------------------------------------===//
6418 
6419 bool AMDGPUOperand::isBLGP() const {
6420   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6421 }
6422 
6423 bool AMDGPUOperand::isCBSZ() const {
6424   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6425 }
6426 
6427 bool AMDGPUOperand::isABID() const {
6428   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6429 }
6430 
6431 bool AMDGPUOperand::isS16Imm() const {
6432   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6433 }
6434 
6435 bool AMDGPUOperand::isU16Imm() const {
6436   return isImm() && isUInt<16>(getImm());
6437 }
6438 
6439 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6440   if (!isGFX10())
6441     return MatchOperand_NoMatch;
6442 
6443   SMLoc S = Parser.getTok().getLoc();
6444 
6445   if (getLexer().isNot(AsmToken::Identifier))
6446     return MatchOperand_NoMatch;
6447   if (getLexer().getTok().getString() != "dim")
6448     return MatchOperand_NoMatch;
6449 
6450   Parser.Lex();
6451   if (getLexer().isNot(AsmToken::Colon))
6452     return MatchOperand_ParseFail;
6453 
6454   Parser.Lex();
6455 
6456   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6457   // integer.
6458   std::string Token;
6459   if (getLexer().is(AsmToken::Integer)) {
6460     SMLoc Loc = getLexer().getTok().getEndLoc();
6461     Token = getLexer().getTok().getString();
6462     Parser.Lex();
6463     if (getLexer().getTok().getLoc() != Loc)
6464       return MatchOperand_ParseFail;
6465   }
6466   if (getLexer().isNot(AsmToken::Identifier))
6467     return MatchOperand_ParseFail;
6468   Token += getLexer().getTok().getString();
6469 
6470   StringRef DimId = Token;
6471   if (DimId.startswith("SQ_RSRC_IMG_"))
6472     DimId = DimId.substr(12);
6473 
6474   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6475   if (!DimInfo)
6476     return MatchOperand_ParseFail;
6477 
6478   Parser.Lex();
6479 
6480   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6481                                               AMDGPUOperand::ImmTyDim));
6482   return MatchOperand_Success;
6483 }
6484 
6485 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6486   SMLoc S = Parser.getTok().getLoc();
6487   StringRef Prefix;
6488 
6489   if (getLexer().getKind() == AsmToken::Identifier) {
6490     Prefix = Parser.getTok().getString();
6491   } else {
6492     return MatchOperand_NoMatch;
6493   }
6494 
6495   if (Prefix != "dpp8")
6496     return parseDPPCtrl(Operands);
6497   if (!isGFX10())
6498     return MatchOperand_NoMatch;
6499 
6500   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6501 
6502   int64_t Sels[8];
6503 
6504   Parser.Lex();
6505   if (getLexer().isNot(AsmToken::Colon))
6506     return MatchOperand_ParseFail;
6507 
6508   Parser.Lex();
6509   if (getLexer().isNot(AsmToken::LBrac))
6510     return MatchOperand_ParseFail;
6511 
6512   Parser.Lex();
6513   if (getParser().parseAbsoluteExpression(Sels[0]))
6514     return MatchOperand_ParseFail;
6515   if (0 > Sels[0] || 7 < Sels[0])
6516     return MatchOperand_ParseFail;
6517 
6518   for (size_t i = 1; i < 8; ++i) {
6519     if (getLexer().isNot(AsmToken::Comma))
6520       return MatchOperand_ParseFail;
6521 
6522     Parser.Lex();
6523     if (getParser().parseAbsoluteExpression(Sels[i]))
6524       return MatchOperand_ParseFail;
6525     if (0 > Sels[i] || 7 < Sels[i])
6526       return MatchOperand_ParseFail;
6527   }
6528 
6529   if (getLexer().isNot(AsmToken::RBrac))
6530     return MatchOperand_ParseFail;
6531   Parser.Lex();
6532 
6533   unsigned DPP8 = 0;
6534   for (size_t i = 0; i < 8; ++i)
6535     DPP8 |= (Sels[i] << (i * 3));
6536 
6537   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6538   return MatchOperand_Success;
6539 }
6540 
6541 OperandMatchResultTy
6542 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6543   using namespace AMDGPU::DPP;
6544 
6545   SMLoc S = Parser.getTok().getLoc();
6546   StringRef Prefix;
6547   int64_t Int;
6548 
6549   if (getLexer().getKind() == AsmToken::Identifier) {
6550     Prefix = Parser.getTok().getString();
6551   } else {
6552     return MatchOperand_NoMatch;
6553   }
6554 
6555   if (Prefix == "row_mirror") {
6556     Int = DppCtrl::ROW_MIRROR;
6557     Parser.Lex();
6558   } else if (Prefix == "row_half_mirror") {
6559     Int = DppCtrl::ROW_HALF_MIRROR;
6560     Parser.Lex();
6561   } else {
6562     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6563     if (Prefix != "quad_perm"
6564         && Prefix != "row_shl"
6565         && Prefix != "row_shr"
6566         && Prefix != "row_ror"
6567         && Prefix != "wave_shl"
6568         && Prefix != "wave_rol"
6569         && Prefix != "wave_shr"
6570         && Prefix != "wave_ror"
6571         && Prefix != "row_bcast"
6572         && Prefix != "row_share"
6573         && Prefix != "row_xmask") {
6574       return MatchOperand_NoMatch;
6575     }
6576 
6577     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6578       return MatchOperand_NoMatch;
6579 
6580     if (!isVI() && !isGFX9() &&
6581         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6582          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6583          Prefix == "row_bcast"))
6584       return MatchOperand_NoMatch;
6585 
6586     Parser.Lex();
6587     if (getLexer().isNot(AsmToken::Colon))
6588       return MatchOperand_ParseFail;
6589 
6590     if (Prefix == "quad_perm") {
6591       // quad_perm:[%d,%d,%d,%d]
6592       Parser.Lex();
6593       if (getLexer().isNot(AsmToken::LBrac))
6594         return MatchOperand_ParseFail;
6595       Parser.Lex();
6596 
6597       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6598         return MatchOperand_ParseFail;
6599 
6600       for (int i = 0; i < 3; ++i) {
6601         if (getLexer().isNot(AsmToken::Comma))
6602           return MatchOperand_ParseFail;
6603         Parser.Lex();
6604 
6605         int64_t Temp;
6606         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6607           return MatchOperand_ParseFail;
6608         const int shift = i*2 + 2;
6609         Int += (Temp << shift);
6610       }
6611 
6612       if (getLexer().isNot(AsmToken::RBrac))
6613         return MatchOperand_ParseFail;
6614       Parser.Lex();
6615     } else {
6616       // sel:%d
6617       Parser.Lex();
6618       if (getParser().parseAbsoluteExpression(Int))
6619         return MatchOperand_ParseFail;
6620 
6621       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6622         Int |= DppCtrl::ROW_SHL0;
6623       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6624         Int |= DppCtrl::ROW_SHR0;
6625       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6626         Int |= DppCtrl::ROW_ROR0;
6627       } else if (Prefix == "wave_shl" && 1 == Int) {
6628         Int = DppCtrl::WAVE_SHL1;
6629       } else if (Prefix == "wave_rol" && 1 == Int) {
6630         Int = DppCtrl::WAVE_ROL1;
6631       } else if (Prefix == "wave_shr" && 1 == Int) {
6632         Int = DppCtrl::WAVE_SHR1;
6633       } else if (Prefix == "wave_ror" && 1 == Int) {
6634         Int = DppCtrl::WAVE_ROR1;
6635       } else if (Prefix == "row_bcast") {
6636         if (Int == 15) {
6637           Int = DppCtrl::BCAST15;
6638         } else if (Int == 31) {
6639           Int = DppCtrl::BCAST31;
6640         } else {
6641           return MatchOperand_ParseFail;
6642         }
6643       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6644         Int |= DppCtrl::ROW_SHARE_FIRST;
6645       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6646         Int |= DppCtrl::ROW_XMASK_FIRST;
6647       } else {
6648         return MatchOperand_ParseFail;
6649       }
6650     }
6651   }
6652 
6653   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6654   return MatchOperand_Success;
6655 }
6656 
6657 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6658   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6659 }
6660 
6661 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6662   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6663 }
6664 
6665 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6666   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6667 }
6668 
6669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6670   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6671 }
6672 
6673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6674   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6675 }
6676 
6677 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6678   OptionalImmIndexMap OptionalIdx;
6679 
6680   unsigned I = 1;
6681   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6682   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6683     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6684   }
6685 
6686   int Fi = 0;
6687   for (unsigned E = Operands.size(); I != E; ++I) {
6688     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6689                                             MCOI::TIED_TO);
6690     if (TiedTo != -1) {
6691       assert((unsigned)TiedTo < Inst.getNumOperands());
6692       // handle tied old or src2 for MAC instructions
6693       Inst.addOperand(Inst.getOperand(TiedTo));
6694     }
6695     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6696     // Add the register arguments
6697     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6698       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6699       // Skip it.
6700       continue;
6701     }
6702 
6703     if (IsDPP8) {
6704       if (Op.isDPP8()) {
6705         Op.addImmOperands(Inst, 1);
6706       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6707         Op.addRegWithFPInputModsOperands(Inst, 2);
6708       } else if (Op.isFI()) {
6709         Fi = Op.getImm();
6710       } else if (Op.isReg()) {
6711         Op.addRegOperands(Inst, 1);
6712       } else {
6713         llvm_unreachable("Invalid operand type");
6714       }
6715     } else {
6716       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6717         Op.addRegWithFPInputModsOperands(Inst, 2);
6718       } else if (Op.isDPPCtrl()) {
6719         Op.addImmOperands(Inst, 1);
6720       } else if (Op.isImm()) {
6721         // Handle optional arguments
6722         OptionalIdx[Op.getImmTy()] = I;
6723       } else {
6724         llvm_unreachable("Invalid operand type");
6725       }
6726     }
6727   }
6728 
6729   if (IsDPP8) {
6730     using namespace llvm::AMDGPU::DPP;
6731     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6732   } else {
6733     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6734     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6735     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6736     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6737       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6738     }
6739   }
6740 }
6741 
6742 //===----------------------------------------------------------------------===//
6743 // sdwa
6744 //===----------------------------------------------------------------------===//
6745 
6746 OperandMatchResultTy
6747 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6748                               AMDGPUOperand::ImmTy Type) {
6749   using namespace llvm::AMDGPU::SDWA;
6750 
6751   SMLoc S = Parser.getTok().getLoc();
6752   StringRef Value;
6753   OperandMatchResultTy res;
6754 
6755   res = parseStringWithPrefix(Prefix, Value);
6756   if (res != MatchOperand_Success) {
6757     return res;
6758   }
6759 
6760   int64_t Int;
6761   Int = StringSwitch<int64_t>(Value)
6762         .Case("BYTE_0", SdwaSel::BYTE_0)
6763         .Case("BYTE_1", SdwaSel::BYTE_1)
6764         .Case("BYTE_2", SdwaSel::BYTE_2)
6765         .Case("BYTE_3", SdwaSel::BYTE_3)
6766         .Case("WORD_0", SdwaSel::WORD_0)
6767         .Case("WORD_1", SdwaSel::WORD_1)
6768         .Case("DWORD", SdwaSel::DWORD)
6769         .Default(0xffffffff);
6770   Parser.Lex(); // eat last token
6771 
6772   if (Int == 0xffffffff) {
6773     return MatchOperand_ParseFail;
6774   }
6775 
6776   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6777   return MatchOperand_Success;
6778 }
6779 
6780 OperandMatchResultTy
6781 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6782   using namespace llvm::AMDGPU::SDWA;
6783 
6784   SMLoc S = Parser.getTok().getLoc();
6785   StringRef Value;
6786   OperandMatchResultTy res;
6787 
6788   res = parseStringWithPrefix("dst_unused", Value);
6789   if (res != MatchOperand_Success) {
6790     return res;
6791   }
6792 
6793   int64_t Int;
6794   Int = StringSwitch<int64_t>(Value)
6795         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6796         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6797         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6798         .Default(0xffffffff);
6799   Parser.Lex(); // eat last token
6800 
6801   if (Int == 0xffffffff) {
6802     return MatchOperand_ParseFail;
6803   }
6804 
6805   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6806   return MatchOperand_Success;
6807 }
6808 
6809 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6810   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6811 }
6812 
6813 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6814   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6815 }
6816 
6817 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6818   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6819 }
6820 
6821 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6822   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6823 }
6824 
6825 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6826                               uint64_t BasicInstType, bool skipVcc) {
6827   using namespace llvm::AMDGPU::SDWA;
6828 
6829   OptionalImmIndexMap OptionalIdx;
6830   bool skippedVcc = false;
6831 
6832   unsigned I = 1;
6833   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6834   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6835     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6836   }
6837 
6838   for (unsigned E = Operands.size(); I != E; ++I) {
6839     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6840     if (skipVcc && !skippedVcc && Op.isReg() &&
6841         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6842       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6843       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6844       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6845       // Skip VCC only if we didn't skip it on previous iteration.
6846       if (BasicInstType == SIInstrFlags::VOP2 &&
6847           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6848         skippedVcc = true;
6849         continue;
6850       } else if (BasicInstType == SIInstrFlags::VOPC &&
6851                  Inst.getNumOperands() == 0) {
6852         skippedVcc = true;
6853         continue;
6854       }
6855     }
6856     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6857       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6858     } else if (Op.isImm()) {
6859       // Handle optional arguments
6860       OptionalIdx[Op.getImmTy()] = I;
6861     } else {
6862       llvm_unreachable("Invalid operand type");
6863     }
6864     skippedVcc = false;
6865   }
6866 
6867   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6868       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6869       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6870     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6871     switch (BasicInstType) {
6872     case SIInstrFlags::VOP1:
6873       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6874       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6875         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6876       }
6877       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6878       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6879       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6880       break;
6881 
6882     case SIInstrFlags::VOP2:
6883       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6884       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6885         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6886       }
6887       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6888       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6889       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6890       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6891       break;
6892 
6893     case SIInstrFlags::VOPC:
6894       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6895         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6896       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6897       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6898       break;
6899 
6900     default:
6901       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6902     }
6903   }
6904 
6905   // special case v_mac_{f16, f32}:
6906   // it has src2 register operand that is tied to dst operand
6907   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6908       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6909     auto it = Inst.begin();
6910     std::advance(
6911       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6912     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6913   }
6914 }
6915 
6916 //===----------------------------------------------------------------------===//
6917 // mAI
6918 //===----------------------------------------------------------------------===//
6919 
6920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6921   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6922 }
6923 
6924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6925   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6926 }
6927 
6928 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6929   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6930 }
6931 
6932 /// Force static initialization.
6933 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6934   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6935   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6936 }
6937 
6938 #define GET_REGISTER_MATCHER
6939 #define GET_MATCHER_IMPLEMENTATION
6940 #define GET_MNEMONIC_SPELL_CHECKER
6941 #include "AMDGPUGenAsmMatcher.inc"
6942 
6943 // This fuction should be defined after auto-generated include so that we have
6944 // MatchClassKind enum defined
6945 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6946                                                      unsigned Kind) {
6947   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6948   // But MatchInstructionImpl() expects to meet token and fails to validate
6949   // operand. This method checks if we are given immediate operand but expect to
6950   // get corresponding token.
6951   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6952   switch (Kind) {
6953   case MCK_addr64:
6954     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6955   case MCK_gds:
6956     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6957   case MCK_lds:
6958     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6959   case MCK_glc:
6960     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6961   case MCK_idxen:
6962     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6963   case MCK_offen:
6964     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6965   case MCK_SSrcB32:
6966     // When operands have expression values, they will return true for isToken,
6967     // because it is not possible to distinguish between a token and an
6968     // expression at parse time. MatchInstructionImpl() will always try to
6969     // match an operand as a token, when isToken returns true, and when the
6970     // name of the expression is not a valid token, the match will fail,
6971     // so we need to handle it here.
6972     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6973   case MCK_SSrcF32:
6974     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6975   case MCK_SoppBrTarget:
6976     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6977   case MCK_VReg32OrOff:
6978     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6979   case MCK_InterpSlot:
6980     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6981   case MCK_Attr:
6982     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6983   case MCK_AttrChan:
6984     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6985   default:
6986     return Match_InvalidOperand;
6987   }
6988 }
6989 
6990 //===----------------------------------------------------------------------===//
6991 // endpgm
6992 //===----------------------------------------------------------------------===//
6993 
6994 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6995   SMLoc S = Parser.getTok().getLoc();
6996   int64_t Imm = 0;
6997 
6998   if (!parseExpr(Imm)) {
6999     // The operand is optional, if not present default to 0
7000     Imm = 0;
7001   }
7002 
7003   if (!isUInt<16>(Imm)) {
7004     Error(S, "expected a 16-bit value");
7005     return MatchOperand_ParseFail;
7006   }
7007 
7008   Operands.push_back(
7009       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7010   return MatchOperand_Success;
7011 }
7012 
7013 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7014