1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDKernelCodeT.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 14 #include "SIDefines.h" 15 #include "SIInstrInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0; 106 Operand |= Neg ? SISrcMods::NEG : 0; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyHigh 177 }; 178 179 struct TokOp { 180 const char *Data; 181 unsigned Length; 182 }; 183 184 struct ImmOp { 185 int64_t Val; 186 ImmTy Type; 187 bool IsFPImm; 188 Modifiers Mods; 189 }; 190 191 struct RegOp { 192 unsigned RegNo; 193 bool IsForcedVOP3; 194 Modifiers Mods; 195 }; 196 197 union { 198 TokOp Tok; 199 ImmOp Imm; 200 RegOp Reg; 201 const MCExpr *Expr; 202 }; 203 204 bool isToken() const override { 205 if (Kind == Token) 206 return true; 207 208 if (Kind != Expression || !Expr) 209 return false; 210 211 // When parsing operands, we can't always tell if something was meant to be 212 // a token, like 'gds', or an expression that references a global variable. 213 // In this case, we assume the string is an expression, and if we need to 214 // interpret is a token, then we treat the symbol name as the token. 215 return isa<MCSymbolRefExpr>(Expr); 216 } 217 218 bool isImm() const override { 219 return Kind == Immediate; 220 } 221 222 bool isInlinableImm(MVT type) const; 223 bool isLiteralImm(MVT type) const; 224 225 bool isRegKind() const { 226 return Kind == Register; 227 } 228 229 bool isReg() const override { 230 return isRegKind() && !hasModifiers(); 231 } 232 233 bool isRegOrImmWithInputMods(MVT type) const { 234 return isRegKind() || isInlinableImm(type); 235 } 236 237 bool isRegOrImmWithInt16InputMods() const { 238 return isRegOrImmWithInputMods(MVT::i16); 239 } 240 241 bool isRegOrImmWithInt32InputMods() const { 242 return isRegOrImmWithInputMods(MVT::i32); 243 } 244 245 bool isRegOrImmWithInt64InputMods() const { 246 return isRegOrImmWithInputMods(MVT::i64); 247 } 248 249 bool isRegOrImmWithFP16InputMods() const { 250 return isRegOrImmWithInputMods(MVT::f16); 251 } 252 253 bool isRegOrImmWithFP32InputMods() const { 254 return isRegOrImmWithInputMods(MVT::f32); 255 } 256 257 bool isRegOrImmWithFP64InputMods() const { 258 return isRegOrImmWithInputMods(MVT::f64); 259 } 260 261 bool isVReg() const { 262 return isRegClass(AMDGPU::VGPR_32RegClassID) || 263 isRegClass(AMDGPU::VReg_64RegClassID) || 264 isRegClass(AMDGPU::VReg_96RegClassID) || 265 isRegClass(AMDGPU::VReg_128RegClassID) || 266 isRegClass(AMDGPU::VReg_256RegClassID) || 267 isRegClass(AMDGPU::VReg_512RegClassID); 268 } 269 270 bool isVReg32OrOff() const { 271 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); 272 } 273 274 bool isSDWAOperand(MVT type) const; 275 bool isSDWAFP16Operand() const; 276 bool isSDWAFP32Operand() const; 277 bool isSDWAInt16Operand() const; 278 bool isSDWAInt32Operand() const; 279 280 bool isImmTy(ImmTy ImmT) const { 281 return isImm() && Imm.Type == ImmT; 282 } 283 284 bool isImmModifier() const { 285 return isImm() && Imm.Type != ImmTyNone; 286 } 287 288 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 289 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 290 bool isDMask() const { return isImmTy(ImmTyDMask); } 291 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 292 bool isDA() const { return isImmTy(ImmTyDA); } 293 bool isR128() const { return isImmTy(ImmTyR128); } 294 bool isLWE() const { return isImmTy(ImmTyLWE); } 295 bool isOff() const { return isImmTy(ImmTyOff); } 296 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 297 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 298 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 299 bool isOffen() const { return isImmTy(ImmTyOffen); } 300 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 301 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 302 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 303 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 304 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 305 306 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 307 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 308 bool isGDS() const { return isImmTy(ImmTyGDS); } 309 bool isLDS() const { return isImmTy(ImmTyLDS); } 310 bool isGLC() const { return isImmTy(ImmTyGLC); } 311 bool isSLC() const { return isImmTy(ImmTySLC); } 312 bool isTFE() const { return isImmTy(ImmTyTFE); } 313 bool isD16() const { return isImmTy(ImmTyD16); } 314 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 315 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 316 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 317 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 318 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 319 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 320 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 321 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 322 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 323 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 324 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 325 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 326 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 327 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 328 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 329 bool isHigh() const { return isImmTy(ImmTyHigh); } 330 331 bool isMod() const { 332 return isClampSI() || isOModSI(); 333 } 334 335 bool isRegOrImm() const { 336 return isReg() || isImm(); 337 } 338 339 bool isRegClass(unsigned RCID) const; 340 341 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 342 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 343 } 344 345 bool isSCSrcB16() const { 346 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 347 } 348 349 bool isSCSrcV2B16() const { 350 return isSCSrcB16(); 351 } 352 353 bool isSCSrcB32() const { 354 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 355 } 356 357 bool isSCSrcB64() const { 358 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 359 } 360 361 bool isSCSrcF16() const { 362 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 363 } 364 365 bool isSCSrcV2F16() const { 366 return isSCSrcF16(); 367 } 368 369 bool isSCSrcF32() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 371 } 372 373 bool isSCSrcF64() const { 374 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 375 } 376 377 bool isSSrcB32() const { 378 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 379 } 380 381 bool isSSrcB16() const { 382 return isSCSrcB16() || isLiteralImm(MVT::i16); 383 } 384 385 bool isSSrcV2B16() const { 386 llvm_unreachable("cannot happen"); 387 return isSSrcB16(); 388 } 389 390 bool isSSrcB64() const { 391 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 392 // See isVSrc64(). 393 return isSCSrcB64() || isLiteralImm(MVT::i64); 394 } 395 396 bool isSSrcF32() const { 397 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 398 } 399 400 bool isSSrcF64() const { 401 return isSCSrcB64() || isLiteralImm(MVT::f64); 402 } 403 404 bool isSSrcF16() const { 405 return isSCSrcB16() || isLiteralImm(MVT::f16); 406 } 407 408 bool isSSrcV2F16() const { 409 llvm_unreachable("cannot happen"); 410 return isSSrcF16(); 411 } 412 413 bool isVCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 415 } 416 417 bool isVCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 419 } 420 421 bool isVCSrcB16() const { 422 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 423 } 424 425 bool isVCSrcV2B16() const { 426 return isVCSrcB16(); 427 } 428 429 bool isVCSrcF32() const { 430 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 431 } 432 433 bool isVCSrcF64() const { 434 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 435 } 436 437 bool isVCSrcF16() const { 438 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 439 } 440 441 bool isVCSrcV2F16() const { 442 return isVCSrcF16(); 443 } 444 445 bool isVSrcB32() const { 446 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 447 } 448 449 bool isVSrcB64() const { 450 return isVCSrcF64() || isLiteralImm(MVT::i64); 451 } 452 453 bool isVSrcB16() const { 454 return isVCSrcF16() || isLiteralImm(MVT::i16); 455 } 456 457 bool isVSrcV2B16() const { 458 llvm_unreachable("cannot happen"); 459 return isVSrcB16(); 460 } 461 462 bool isVSrcF32() const { 463 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 464 } 465 466 bool isVSrcF64() const { 467 return isVCSrcF64() || isLiteralImm(MVT::f64); 468 } 469 470 bool isVSrcF16() const { 471 return isVCSrcF16() || isLiteralImm(MVT::f16); 472 } 473 474 bool isVSrcV2F16() const { 475 llvm_unreachable("cannot happen"); 476 return isVSrcF16(); 477 } 478 479 bool isKImmFP32() const { 480 return isLiteralImm(MVT::f32); 481 } 482 483 bool isKImmFP16() const { 484 return isLiteralImm(MVT::f16); 485 } 486 487 bool isMem() const override { 488 return false; 489 } 490 491 bool isExpr() const { 492 return Kind == Expression; 493 } 494 495 bool isSoppBrTarget() const { 496 return isExpr() || isImm(); 497 } 498 499 bool isSWaitCnt() const; 500 bool isHwreg() const; 501 bool isSendMsg() const; 502 bool isSwizzle() const; 503 bool isSMRDOffset8() const; 504 bool isSMRDOffset20() const; 505 bool isSMRDLiteralOffset() const; 506 bool isDPPCtrl() const; 507 bool isGPRIdxMode() const; 508 bool isS16Imm() const; 509 bool isU16Imm() const; 510 511 StringRef getExpressionAsToken() const { 512 assert(isExpr()); 513 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 514 return S->getSymbol().getName(); 515 } 516 517 StringRef getToken() const { 518 assert(isToken()); 519 520 if (Kind == Expression) 521 return getExpressionAsToken(); 522 523 return StringRef(Tok.Data, Tok.Length); 524 } 525 526 int64_t getImm() const { 527 assert(isImm()); 528 return Imm.Val; 529 } 530 531 ImmTy getImmTy() const { 532 assert(isImm()); 533 return Imm.Type; 534 } 535 536 unsigned getReg() const override { 537 return Reg.RegNo; 538 } 539 540 SMLoc getStartLoc() const override { 541 return StartLoc; 542 } 543 544 SMLoc getEndLoc() const override { 545 return EndLoc; 546 } 547 548 SMRange getLocRange() const { 549 return SMRange(StartLoc, EndLoc); 550 } 551 552 Modifiers getModifiers() const { 553 assert(isRegKind() || isImmTy(ImmTyNone)); 554 return isRegKind() ? Reg.Mods : Imm.Mods; 555 } 556 557 void setModifiers(Modifiers Mods) { 558 assert(isRegKind() || isImmTy(ImmTyNone)); 559 if (isRegKind()) 560 Reg.Mods = Mods; 561 else 562 Imm.Mods = Mods; 563 } 564 565 bool hasModifiers() const { 566 return getModifiers().hasModifiers(); 567 } 568 569 bool hasFPModifiers() const { 570 return getModifiers().hasFPModifiers(); 571 } 572 573 bool hasIntModifiers() const { 574 return getModifiers().hasIntModifiers(); 575 } 576 577 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 578 579 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 580 581 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 582 583 template <unsigned Bitwidth> 584 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 585 586 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 587 addKImmFPOperands<16>(Inst, N); 588 } 589 590 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 591 addKImmFPOperands<32>(Inst, N); 592 } 593 594 void addRegOperands(MCInst &Inst, unsigned N) const; 595 596 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 597 if (isRegKind()) 598 addRegOperands(Inst, N); 599 else if (isExpr()) 600 Inst.addOperand(MCOperand::createExpr(Expr)); 601 else 602 addImmOperands(Inst, N); 603 } 604 605 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 606 Modifiers Mods = getModifiers(); 607 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 608 if (isRegKind()) { 609 addRegOperands(Inst, N); 610 } else { 611 addImmOperands(Inst, N, false); 612 } 613 } 614 615 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 616 assert(!hasIntModifiers()); 617 addRegOrImmWithInputModsOperands(Inst, N); 618 } 619 620 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 621 assert(!hasFPModifiers()); 622 addRegOrImmWithInputModsOperands(Inst, N); 623 } 624 625 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 626 Modifiers Mods = getModifiers(); 627 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 628 assert(isRegKind()); 629 addRegOperands(Inst, N); 630 } 631 632 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 633 assert(!hasIntModifiers()); 634 addRegWithInputModsOperands(Inst, N); 635 } 636 637 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 638 assert(!hasFPModifiers()); 639 addRegWithInputModsOperands(Inst, N); 640 } 641 642 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 643 if (isImm()) 644 addImmOperands(Inst, N); 645 else { 646 assert(isExpr()); 647 Inst.addOperand(MCOperand::createExpr(Expr)); 648 } 649 } 650 651 static void printImmTy(raw_ostream& OS, ImmTy Type) { 652 switch (Type) { 653 case ImmTyNone: OS << "None"; break; 654 case ImmTyGDS: OS << "GDS"; break; 655 case ImmTyLDS: OS << "LDS"; break; 656 case ImmTyOffen: OS << "Offen"; break; 657 case ImmTyIdxen: OS << "Idxen"; break; 658 case ImmTyAddr64: OS << "Addr64"; break; 659 case ImmTyOffset: OS << "Offset"; break; 660 case ImmTyInstOffset: OS << "InstOffset"; break; 661 case ImmTyOffset0: OS << "Offset0"; break; 662 case ImmTyOffset1: OS << "Offset1"; break; 663 case ImmTyGLC: OS << "GLC"; break; 664 case ImmTySLC: OS << "SLC"; break; 665 case ImmTyTFE: OS << "TFE"; break; 666 case ImmTyD16: OS << "D16"; break; 667 case ImmTyFORMAT: OS << "FORMAT"; break; 668 case ImmTyClampSI: OS << "ClampSI"; break; 669 case ImmTyOModSI: OS << "OModSI"; break; 670 case ImmTyDppCtrl: OS << "DppCtrl"; break; 671 case ImmTyDppRowMask: OS << "DppRowMask"; break; 672 case ImmTyDppBankMask: OS << "DppBankMask"; break; 673 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 674 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 675 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 676 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 677 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 678 case ImmTyDMask: OS << "DMask"; break; 679 case ImmTyUNorm: OS << "UNorm"; break; 680 case ImmTyDA: OS << "DA"; break; 681 case ImmTyR128: OS << "R128"; break; 682 case ImmTyLWE: OS << "LWE"; break; 683 case ImmTyOff: OS << "Off"; break; 684 case ImmTyExpTgt: OS << "ExpTgt"; break; 685 case ImmTyExpCompr: OS << "ExpCompr"; break; 686 case ImmTyExpVM: OS << "ExpVM"; break; 687 case ImmTyHwreg: OS << "Hwreg"; break; 688 case ImmTySendMsg: OS << "SendMsg"; break; 689 case ImmTyInterpSlot: OS << "InterpSlot"; break; 690 case ImmTyInterpAttr: OS << "InterpAttr"; break; 691 case ImmTyAttrChan: OS << "AttrChan"; break; 692 case ImmTyOpSel: OS << "OpSel"; break; 693 case ImmTyOpSelHi: OS << "OpSelHi"; break; 694 case ImmTyNegLo: OS << "NegLo"; break; 695 case ImmTyNegHi: OS << "NegHi"; break; 696 case ImmTySwizzle: OS << "Swizzle"; break; 697 case ImmTyHigh: OS << "High"; break; 698 } 699 } 700 701 void print(raw_ostream &OS) const override { 702 switch (Kind) { 703 case Register: 704 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 705 break; 706 case Immediate: 707 OS << '<' << getImm(); 708 if (getImmTy() != ImmTyNone) { 709 OS << " type: "; printImmTy(OS, getImmTy()); 710 } 711 OS << " mods: " << Imm.Mods << '>'; 712 break; 713 case Token: 714 OS << '\'' << getToken() << '\''; 715 break; 716 case Expression: 717 OS << "<expr " << *Expr << '>'; 718 break; 719 } 720 } 721 722 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 723 int64_t Val, SMLoc Loc, 724 ImmTy Type = ImmTyNone, 725 bool IsFPImm = false) { 726 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 727 Op->Imm.Val = Val; 728 Op->Imm.IsFPImm = IsFPImm; 729 Op->Imm.Type = Type; 730 Op->Imm.Mods = Modifiers(); 731 Op->StartLoc = Loc; 732 Op->EndLoc = Loc; 733 return Op; 734 } 735 736 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 737 StringRef Str, SMLoc Loc, 738 bool HasExplicitEncodingSize = true) { 739 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 740 Res->Tok.Data = Str.data(); 741 Res->Tok.Length = Str.size(); 742 Res->StartLoc = Loc; 743 Res->EndLoc = Loc; 744 return Res; 745 } 746 747 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 748 unsigned RegNo, SMLoc S, 749 SMLoc E, 750 bool ForceVOP3) { 751 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 752 Op->Reg.RegNo = RegNo; 753 Op->Reg.Mods = Modifiers(); 754 Op->Reg.IsForcedVOP3 = ForceVOP3; 755 Op->StartLoc = S; 756 Op->EndLoc = E; 757 return Op; 758 } 759 760 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 761 const class MCExpr *Expr, SMLoc S) { 762 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 763 Op->Expr = Expr; 764 Op->StartLoc = S; 765 Op->EndLoc = S; 766 return Op; 767 } 768 }; 769 770 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 771 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 772 return OS; 773 } 774 775 //===----------------------------------------------------------------------===// 776 // AsmParser 777 //===----------------------------------------------------------------------===// 778 779 // Holds info related to the current kernel, e.g. count of SGPRs used. 780 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 781 // .amdgpu_hsa_kernel or at EOF. 782 class KernelScopeInfo { 783 int SgprIndexUnusedMin = -1; 784 int VgprIndexUnusedMin = -1; 785 MCContext *Ctx = nullptr; 786 787 void usesSgprAt(int i) { 788 if (i >= SgprIndexUnusedMin) { 789 SgprIndexUnusedMin = ++i; 790 if (Ctx) { 791 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 792 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 793 } 794 } 795 } 796 797 void usesVgprAt(int i) { 798 if (i >= VgprIndexUnusedMin) { 799 VgprIndexUnusedMin = ++i; 800 if (Ctx) { 801 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 802 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 803 } 804 } 805 } 806 807 public: 808 KernelScopeInfo() = default; 809 810 void initialize(MCContext &Context) { 811 Ctx = &Context; 812 usesSgprAt(SgprIndexUnusedMin = -1); 813 usesVgprAt(VgprIndexUnusedMin = -1); 814 } 815 816 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 817 switch (RegKind) { 818 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 819 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 820 default: break; 821 } 822 } 823 }; 824 825 class AMDGPUAsmParser : public MCTargetAsmParser { 826 MCAsmParser &Parser; 827 828 // Number of extra operands parsed after the first optional operand. 829 // This may be necessary to skip hardcoded mandatory operands. 830 static const unsigned MAX_OPR_LOOKAHEAD = 8; 831 832 unsigned ForcedEncodingSize = 0; 833 bool ForcedDPP = false; 834 bool ForcedSDWA = false; 835 KernelScopeInfo KernelScope; 836 837 /// @name Auto-generated Match Functions 838 /// { 839 840 #define GET_ASSEMBLER_HEADER 841 #include "AMDGPUGenAsmMatcher.inc" 842 843 /// } 844 845 private: 846 bool ParseAsAbsoluteExpression(uint32_t &Ret); 847 bool OutOfRangeError(SMRange Range); 848 /// Calculate VGPR/SGPR blocks required for given target, reserved 849 /// registers, and user-specified NextFreeXGPR values. 850 /// 851 /// \param Features [in] Target features, used for bug corrections. 852 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 853 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 854 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 855 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 856 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 857 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 858 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 859 /// \param VGPRBlocks [out] Result VGPR block count. 860 /// \param SGPRBlocks [out] Result SGPR block count. 861 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 862 bool FlatScrUsed, bool XNACKUsed, 863 unsigned NextFreeVGPR, SMRange VGPRRange, 864 unsigned NextFreeSGPR, SMRange SGPRRange, 865 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 866 bool ParseDirectiveAMDGCNTarget(); 867 bool ParseDirectiveAMDHSAKernel(); 868 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 869 bool ParseDirectiveHSACodeObjectVersion(); 870 bool ParseDirectiveHSACodeObjectISA(); 871 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 872 bool ParseDirectiveAMDKernelCodeT(); 873 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 874 bool ParseDirectiveAMDGPUHsaKernel(); 875 876 bool ParseDirectiveISAVersion(); 877 bool ParseDirectiveHSAMetadata(); 878 bool ParseDirectivePALMetadata(); 879 880 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 881 RegisterKind RegKind, unsigned Reg1, 882 unsigned RegNum); 883 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 884 unsigned& RegNum, unsigned& RegWidth, 885 unsigned *DwordRegIndex); 886 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 887 void initializeGprCountSymbol(RegisterKind RegKind); 888 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 889 unsigned RegWidth); 890 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 891 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 892 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 893 bool IsGdsHardcoded); 894 895 public: 896 enum AMDGPUMatchResultTy { 897 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 898 }; 899 900 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 901 902 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 903 const MCInstrInfo &MII, 904 const MCTargetOptions &Options) 905 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 906 MCAsmParserExtension::Initialize(Parser); 907 908 if (getFeatureBits().none()) { 909 // Set default features. 910 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 911 } 912 913 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 914 915 { 916 // TODO: make those pre-defined variables read-only. 917 // Currently there is none suitable machinery in the core llvm-mc for this. 918 // MCSymbol::isRedefinable is intended for another purpose, and 919 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 920 AMDGPU::IsaInfo::IsaVersion ISA = 921 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 922 MCContext &Ctx = getContext(); 923 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 924 MCSymbol *Sym = 925 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 926 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 927 } else { 928 MCSymbol *Sym = 929 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 930 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 931 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 932 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 933 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 934 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 935 } 936 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 937 initializeGprCountSymbol(IS_VGPR); 938 initializeGprCountSymbol(IS_SGPR); 939 } else 940 KernelScope.initialize(getContext()); 941 } 942 } 943 944 bool hasXNACK() const { 945 return AMDGPU::hasXNACK(getSTI()); 946 } 947 948 bool hasMIMG_R128() const { 949 return AMDGPU::hasMIMG_R128(getSTI()); 950 } 951 952 bool hasPackedD16() const { 953 return AMDGPU::hasPackedD16(getSTI()); 954 } 955 956 bool isSI() const { 957 return AMDGPU::isSI(getSTI()); 958 } 959 960 bool isCI() const { 961 return AMDGPU::isCI(getSTI()); 962 } 963 964 bool isVI() const { 965 return AMDGPU::isVI(getSTI()); 966 } 967 968 bool isGFX9() const { 969 return AMDGPU::isGFX9(getSTI()); 970 } 971 972 bool hasInv2PiInlineImm() const { 973 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 974 } 975 976 bool hasFlatOffsets() const { 977 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 978 } 979 980 bool hasSGPR102_SGPR103() const { 981 return !isVI(); 982 } 983 984 bool hasIntClamp() const { 985 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 986 } 987 988 AMDGPUTargetStreamer &getTargetStreamer() { 989 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 990 return static_cast<AMDGPUTargetStreamer &>(TS); 991 } 992 993 const MCRegisterInfo *getMRI() const { 994 // We need this const_cast because for some reason getContext() is not const 995 // in MCAsmParser. 996 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 997 } 998 999 const MCInstrInfo *getMII() const { 1000 return &MII; 1001 } 1002 1003 const FeatureBitset &getFeatureBits() const { 1004 return getSTI().getFeatureBits(); 1005 } 1006 1007 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1008 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1009 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1010 1011 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1012 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1013 bool isForcedDPP() const { return ForcedDPP; } 1014 bool isForcedSDWA() const { return ForcedSDWA; } 1015 ArrayRef<unsigned> getMatchedVariants() const; 1016 1017 std::unique_ptr<AMDGPUOperand> parseRegister(); 1018 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1019 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1020 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1021 unsigned Kind) override; 1022 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1023 OperandVector &Operands, MCStreamer &Out, 1024 uint64_t &ErrorInfo, 1025 bool MatchingInlineAsm) override; 1026 bool ParseDirective(AsmToken DirectiveID) override; 1027 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1028 StringRef parseMnemonicSuffix(StringRef Name); 1029 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1030 SMLoc NameLoc, OperandVector &Operands) override; 1031 //bool ProcessInstruction(MCInst &Inst); 1032 1033 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1034 1035 OperandMatchResultTy 1036 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1037 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1038 bool (*ConvertResult)(int64_t &) = nullptr); 1039 1040 OperandMatchResultTy parseOperandArrayWithPrefix( 1041 const char *Prefix, 1042 OperandVector &Operands, 1043 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1044 bool (*ConvertResult)(int64_t&) = nullptr); 1045 1046 OperandMatchResultTy 1047 parseNamedBit(const char *Name, OperandVector &Operands, 1048 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1049 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1050 StringRef &Value); 1051 1052 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1053 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1054 OperandMatchResultTy parseReg(OperandVector &Operands); 1055 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1056 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1057 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1058 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1059 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1060 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1061 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1062 1063 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1064 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1065 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1066 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1067 1068 bool parseCnt(int64_t &IntVal); 1069 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1070 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1071 1072 private: 1073 struct OperandInfoTy { 1074 int64_t Id; 1075 bool IsSymbolic = false; 1076 1077 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1078 }; 1079 1080 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1081 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1082 1083 void errorExpTgt(); 1084 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1085 1086 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1087 bool validateConstantBusLimitations(const MCInst &Inst); 1088 bool validateEarlyClobberLimitations(const MCInst &Inst); 1089 bool validateIntClampSupported(const MCInst &Inst); 1090 bool validateMIMGAtomicDMask(const MCInst &Inst); 1091 bool validateMIMGGatherDMask(const MCInst &Inst); 1092 bool validateMIMGDataSize(const MCInst &Inst); 1093 bool validateMIMGR128(const MCInst &Inst); 1094 bool validateMIMGD16(const MCInst &Inst); 1095 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1096 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1097 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1098 1099 bool trySkipId(const StringRef Id); 1100 bool trySkipToken(const AsmToken::TokenKind Kind); 1101 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1102 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1103 bool parseExpr(int64_t &Imm); 1104 1105 public: 1106 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1107 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1108 1109 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1110 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1111 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1112 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1113 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1114 1115 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1116 const unsigned MinVal, 1117 const unsigned MaxVal, 1118 const StringRef ErrMsg); 1119 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1120 bool parseSwizzleOffset(int64_t &Imm); 1121 bool parseSwizzleMacro(int64_t &Imm); 1122 bool parseSwizzleQuadPerm(int64_t &Imm); 1123 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1124 bool parseSwizzleBroadcast(int64_t &Imm); 1125 bool parseSwizzleSwap(int64_t &Imm); 1126 bool parseSwizzleReverse(int64_t &Imm); 1127 1128 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1129 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1130 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1131 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1132 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1133 1134 AMDGPUOperand::Ptr defaultGLC() const; 1135 AMDGPUOperand::Ptr defaultSLC() const; 1136 1137 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1138 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1139 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1140 AMDGPUOperand::Ptr defaultOffsetU12() const; 1141 AMDGPUOperand::Ptr defaultOffsetS13() const; 1142 1143 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1144 1145 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1146 OptionalImmIndexMap &OptionalIdx); 1147 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1148 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1149 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1150 1151 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1152 1153 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1154 bool IsAtomic = false); 1155 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1156 1157 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1158 AMDGPUOperand::Ptr defaultRowMask() const; 1159 AMDGPUOperand::Ptr defaultBankMask() const; 1160 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1161 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1162 1163 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1164 AMDGPUOperand::ImmTy Type); 1165 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1166 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1167 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1168 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1169 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1170 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1171 uint64_t BasicInstType, bool skipVcc = false); 1172 }; 1173 1174 struct OptionalOperand { 1175 const char *Name; 1176 AMDGPUOperand::ImmTy Type; 1177 bool IsBit; 1178 bool (*ConvertResult)(int64_t&); 1179 }; 1180 1181 } // end anonymous namespace 1182 1183 // May be called with integer type with equivalent bitwidth. 1184 static const fltSemantics *getFltSemantics(unsigned Size) { 1185 switch (Size) { 1186 case 4: 1187 return &APFloat::IEEEsingle(); 1188 case 8: 1189 return &APFloat::IEEEdouble(); 1190 case 2: 1191 return &APFloat::IEEEhalf(); 1192 default: 1193 llvm_unreachable("unsupported fp type"); 1194 } 1195 } 1196 1197 static const fltSemantics *getFltSemantics(MVT VT) { 1198 return getFltSemantics(VT.getSizeInBits() / 8); 1199 } 1200 1201 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1202 switch (OperandType) { 1203 case AMDGPU::OPERAND_REG_IMM_INT32: 1204 case AMDGPU::OPERAND_REG_IMM_FP32: 1205 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1206 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1207 return &APFloat::IEEEsingle(); 1208 case AMDGPU::OPERAND_REG_IMM_INT64: 1209 case AMDGPU::OPERAND_REG_IMM_FP64: 1210 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1211 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1212 return &APFloat::IEEEdouble(); 1213 case AMDGPU::OPERAND_REG_IMM_INT16: 1214 case AMDGPU::OPERAND_REG_IMM_FP16: 1215 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1216 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1217 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1218 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1219 return &APFloat::IEEEhalf(); 1220 default: 1221 llvm_unreachable("unsupported fp type"); 1222 } 1223 } 1224 1225 //===----------------------------------------------------------------------===// 1226 // Operand 1227 //===----------------------------------------------------------------------===// 1228 1229 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1230 bool Lost; 1231 1232 // Convert literal to single precision 1233 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1234 APFloat::rmNearestTiesToEven, 1235 &Lost); 1236 // We allow precision lost but not overflow or underflow 1237 if (Status != APFloat::opOK && 1238 Lost && 1239 ((Status & APFloat::opOverflow) != 0 || 1240 (Status & APFloat::opUnderflow) != 0)) { 1241 return false; 1242 } 1243 1244 return true; 1245 } 1246 1247 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1248 if (!isImmTy(ImmTyNone)) { 1249 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1250 return false; 1251 } 1252 // TODO: We should avoid using host float here. It would be better to 1253 // check the float bit values which is what a few other places do. 1254 // We've had bot failures before due to weird NaN support on mips hosts. 1255 1256 APInt Literal(64, Imm.Val); 1257 1258 if (Imm.IsFPImm) { // We got fp literal token 1259 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1260 return AMDGPU::isInlinableLiteral64(Imm.Val, 1261 AsmParser->hasInv2PiInlineImm()); 1262 } 1263 1264 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1265 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1266 return false; 1267 1268 if (type.getScalarSizeInBits() == 16) { 1269 return AMDGPU::isInlinableLiteral16( 1270 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1271 AsmParser->hasInv2PiInlineImm()); 1272 } 1273 1274 // Check if single precision literal is inlinable 1275 return AMDGPU::isInlinableLiteral32( 1276 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1277 AsmParser->hasInv2PiInlineImm()); 1278 } 1279 1280 // We got int literal token. 1281 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1282 return AMDGPU::isInlinableLiteral64(Imm.Val, 1283 AsmParser->hasInv2PiInlineImm()); 1284 } 1285 1286 if (type.getScalarSizeInBits() == 16) { 1287 return AMDGPU::isInlinableLiteral16( 1288 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1289 AsmParser->hasInv2PiInlineImm()); 1290 } 1291 1292 return AMDGPU::isInlinableLiteral32( 1293 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1294 AsmParser->hasInv2PiInlineImm()); 1295 } 1296 1297 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1298 // Check that this immediate can be added as literal 1299 if (!isImmTy(ImmTyNone)) { 1300 return false; 1301 } 1302 1303 if (!Imm.IsFPImm) { 1304 // We got int literal token. 1305 1306 if (type == MVT::f64 && hasFPModifiers()) { 1307 // Cannot apply fp modifiers to int literals preserving the same semantics 1308 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1309 // disable these cases. 1310 return false; 1311 } 1312 1313 unsigned Size = type.getSizeInBits(); 1314 if (Size == 64) 1315 Size = 32; 1316 1317 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1318 // types. 1319 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1320 } 1321 1322 // We got fp literal token 1323 if (type == MVT::f64) { // Expected 64-bit fp operand 1324 // We would set low 64-bits of literal to zeroes but we accept this literals 1325 return true; 1326 } 1327 1328 if (type == MVT::i64) { // Expected 64-bit int operand 1329 // We don't allow fp literals in 64-bit integer instructions. It is 1330 // unclear how we should encode them. 1331 return false; 1332 } 1333 1334 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1335 return canLosslesslyConvertToFPType(FPLiteral, type); 1336 } 1337 1338 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1339 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1340 } 1341 1342 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1343 if (AsmParser->isVI()) 1344 return isVReg(); 1345 else if (AsmParser->isGFX9()) 1346 return isRegKind() || isInlinableImm(type); 1347 else 1348 return false; 1349 } 1350 1351 bool AMDGPUOperand::isSDWAFP16Operand() const { 1352 return isSDWAOperand(MVT::f16); 1353 } 1354 1355 bool AMDGPUOperand::isSDWAFP32Operand() const { 1356 return isSDWAOperand(MVT::f32); 1357 } 1358 1359 bool AMDGPUOperand::isSDWAInt16Operand() const { 1360 return isSDWAOperand(MVT::i16); 1361 } 1362 1363 bool AMDGPUOperand::isSDWAInt32Operand() const { 1364 return isSDWAOperand(MVT::i32); 1365 } 1366 1367 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1368 { 1369 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1370 assert(Size == 2 || Size == 4 || Size == 8); 1371 1372 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1373 1374 if (Imm.Mods.Abs) { 1375 Val &= ~FpSignMask; 1376 } 1377 if (Imm.Mods.Neg) { 1378 Val ^= FpSignMask; 1379 } 1380 1381 return Val; 1382 } 1383 1384 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1385 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1386 Inst.getNumOperands())) { 1387 addLiteralImmOperand(Inst, Imm.Val, 1388 ApplyModifiers & 1389 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1390 } else { 1391 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1392 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1393 } 1394 } 1395 1396 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1397 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1398 auto OpNum = Inst.getNumOperands(); 1399 // Check that this operand accepts literals 1400 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1401 1402 if (ApplyModifiers) { 1403 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1404 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1405 Val = applyInputFPModifiers(Val, Size); 1406 } 1407 1408 APInt Literal(64, Val); 1409 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1410 1411 if (Imm.IsFPImm) { // We got fp literal token 1412 switch (OpTy) { 1413 case AMDGPU::OPERAND_REG_IMM_INT64: 1414 case AMDGPU::OPERAND_REG_IMM_FP64: 1415 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1416 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1417 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1418 AsmParser->hasInv2PiInlineImm())) { 1419 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1420 return; 1421 } 1422 1423 // Non-inlineable 1424 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1425 // For fp operands we check if low 32 bits are zeros 1426 if (Literal.getLoBits(32) != 0) { 1427 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1428 "Can't encode literal as exact 64-bit floating-point operand. " 1429 "Low 32-bits will be set to zero"); 1430 } 1431 1432 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1433 return; 1434 } 1435 1436 // We don't allow fp literals in 64-bit integer instructions. It is 1437 // unclear how we should encode them. This case should be checked earlier 1438 // in predicate methods (isLiteralImm()) 1439 llvm_unreachable("fp literal in 64-bit integer instruction."); 1440 1441 case AMDGPU::OPERAND_REG_IMM_INT32: 1442 case AMDGPU::OPERAND_REG_IMM_FP32: 1443 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1444 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1445 case AMDGPU::OPERAND_REG_IMM_INT16: 1446 case AMDGPU::OPERAND_REG_IMM_FP16: 1447 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1448 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1449 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1450 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1451 bool lost; 1452 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1453 // Convert literal to single precision 1454 FPLiteral.convert(*getOpFltSemantics(OpTy), 1455 APFloat::rmNearestTiesToEven, &lost); 1456 // We allow precision lost but not overflow or underflow. This should be 1457 // checked earlier in isLiteralImm() 1458 1459 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1460 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1461 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1462 ImmVal |= (ImmVal << 16); 1463 } 1464 1465 Inst.addOperand(MCOperand::createImm(ImmVal)); 1466 return; 1467 } 1468 default: 1469 llvm_unreachable("invalid operand size"); 1470 } 1471 1472 return; 1473 } 1474 1475 // We got int literal token. 1476 // Only sign extend inline immediates. 1477 // FIXME: No errors on truncation 1478 switch (OpTy) { 1479 case AMDGPU::OPERAND_REG_IMM_INT32: 1480 case AMDGPU::OPERAND_REG_IMM_FP32: 1481 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1482 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1483 if (isInt<32>(Val) && 1484 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1485 AsmParser->hasInv2PiInlineImm())) { 1486 Inst.addOperand(MCOperand::createImm(Val)); 1487 return; 1488 } 1489 1490 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1491 return; 1492 1493 case AMDGPU::OPERAND_REG_IMM_INT64: 1494 case AMDGPU::OPERAND_REG_IMM_FP64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1496 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1497 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1498 Inst.addOperand(MCOperand::createImm(Val)); 1499 return; 1500 } 1501 1502 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1503 return; 1504 1505 case AMDGPU::OPERAND_REG_IMM_INT16: 1506 case AMDGPU::OPERAND_REG_IMM_FP16: 1507 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1508 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1509 if (isInt<16>(Val) && 1510 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1511 AsmParser->hasInv2PiInlineImm())) { 1512 Inst.addOperand(MCOperand::createImm(Val)); 1513 return; 1514 } 1515 1516 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1517 return; 1518 1519 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1520 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1521 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1522 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1523 AsmParser->hasInv2PiInlineImm())); 1524 1525 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1526 static_cast<uint32_t>(LiteralVal); 1527 Inst.addOperand(MCOperand::createImm(ImmVal)); 1528 return; 1529 } 1530 default: 1531 llvm_unreachable("invalid operand size"); 1532 } 1533 } 1534 1535 template <unsigned Bitwidth> 1536 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1537 APInt Literal(64, Imm.Val); 1538 1539 if (!Imm.IsFPImm) { 1540 // We got int literal token. 1541 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1542 return; 1543 } 1544 1545 bool Lost; 1546 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1547 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1548 APFloat::rmNearestTiesToEven, &Lost); 1549 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1550 } 1551 1552 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1553 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1554 } 1555 1556 //===----------------------------------------------------------------------===// 1557 // AsmParser 1558 //===----------------------------------------------------------------------===// 1559 1560 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1561 if (Is == IS_VGPR) { 1562 switch (RegWidth) { 1563 default: return -1; 1564 case 1: return AMDGPU::VGPR_32RegClassID; 1565 case 2: return AMDGPU::VReg_64RegClassID; 1566 case 3: return AMDGPU::VReg_96RegClassID; 1567 case 4: return AMDGPU::VReg_128RegClassID; 1568 case 8: return AMDGPU::VReg_256RegClassID; 1569 case 16: return AMDGPU::VReg_512RegClassID; 1570 } 1571 } else if (Is == IS_TTMP) { 1572 switch (RegWidth) { 1573 default: return -1; 1574 case 1: return AMDGPU::TTMP_32RegClassID; 1575 case 2: return AMDGPU::TTMP_64RegClassID; 1576 case 4: return AMDGPU::TTMP_128RegClassID; 1577 case 8: return AMDGPU::TTMP_256RegClassID; 1578 case 16: return AMDGPU::TTMP_512RegClassID; 1579 } 1580 } else if (Is == IS_SGPR) { 1581 switch (RegWidth) { 1582 default: return -1; 1583 case 1: return AMDGPU::SGPR_32RegClassID; 1584 case 2: return AMDGPU::SGPR_64RegClassID; 1585 case 4: return AMDGPU::SGPR_128RegClassID; 1586 case 8: return AMDGPU::SGPR_256RegClassID; 1587 case 16: return AMDGPU::SGPR_512RegClassID; 1588 } 1589 } 1590 return -1; 1591 } 1592 1593 static unsigned getSpecialRegForName(StringRef RegName) { 1594 return StringSwitch<unsigned>(RegName) 1595 .Case("exec", AMDGPU::EXEC) 1596 .Case("vcc", AMDGPU::VCC) 1597 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1598 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1599 .Case("m0", AMDGPU::M0) 1600 .Case("scc", AMDGPU::SCC) 1601 .Case("tba", AMDGPU::TBA) 1602 .Case("tma", AMDGPU::TMA) 1603 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1604 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1605 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1606 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1607 .Case("vcc_lo", AMDGPU::VCC_LO) 1608 .Case("vcc_hi", AMDGPU::VCC_HI) 1609 .Case("exec_lo", AMDGPU::EXEC_LO) 1610 .Case("exec_hi", AMDGPU::EXEC_HI) 1611 .Case("tma_lo", AMDGPU::TMA_LO) 1612 .Case("tma_hi", AMDGPU::TMA_HI) 1613 .Case("tba_lo", AMDGPU::TBA_LO) 1614 .Case("tba_hi", AMDGPU::TBA_HI) 1615 .Default(0); 1616 } 1617 1618 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1619 SMLoc &EndLoc) { 1620 auto R = parseRegister(); 1621 if (!R) return true; 1622 assert(R->isReg()); 1623 RegNo = R->getReg(); 1624 StartLoc = R->getStartLoc(); 1625 EndLoc = R->getEndLoc(); 1626 return false; 1627 } 1628 1629 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1630 RegisterKind RegKind, unsigned Reg1, 1631 unsigned RegNum) { 1632 switch (RegKind) { 1633 case IS_SPECIAL: 1634 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1635 Reg = AMDGPU::EXEC; 1636 RegWidth = 2; 1637 return true; 1638 } 1639 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1640 Reg = AMDGPU::FLAT_SCR; 1641 RegWidth = 2; 1642 return true; 1643 } 1644 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1645 Reg = AMDGPU::XNACK_MASK; 1646 RegWidth = 2; 1647 return true; 1648 } 1649 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1650 Reg = AMDGPU::VCC; 1651 RegWidth = 2; 1652 return true; 1653 } 1654 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1655 Reg = AMDGPU::TBA; 1656 RegWidth = 2; 1657 return true; 1658 } 1659 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1660 Reg = AMDGPU::TMA; 1661 RegWidth = 2; 1662 return true; 1663 } 1664 return false; 1665 case IS_VGPR: 1666 case IS_SGPR: 1667 case IS_TTMP: 1668 if (Reg1 != Reg + RegWidth) { 1669 return false; 1670 } 1671 RegWidth++; 1672 return true; 1673 default: 1674 llvm_unreachable("unexpected register kind"); 1675 } 1676 } 1677 1678 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1679 unsigned &RegNum, unsigned &RegWidth, 1680 unsigned *DwordRegIndex) { 1681 if (DwordRegIndex) { *DwordRegIndex = 0; } 1682 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1683 if (getLexer().is(AsmToken::Identifier)) { 1684 StringRef RegName = Parser.getTok().getString(); 1685 if ((Reg = getSpecialRegForName(RegName))) { 1686 Parser.Lex(); 1687 RegKind = IS_SPECIAL; 1688 } else { 1689 unsigned RegNumIndex = 0; 1690 if (RegName[0] == 'v') { 1691 RegNumIndex = 1; 1692 RegKind = IS_VGPR; 1693 } else if (RegName[0] == 's') { 1694 RegNumIndex = 1; 1695 RegKind = IS_SGPR; 1696 } else if (RegName.startswith("ttmp")) { 1697 RegNumIndex = strlen("ttmp"); 1698 RegKind = IS_TTMP; 1699 } else { 1700 return false; 1701 } 1702 if (RegName.size() > RegNumIndex) { 1703 // Single 32-bit register: vXX. 1704 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1705 return false; 1706 Parser.Lex(); 1707 RegWidth = 1; 1708 } else { 1709 // Range of registers: v[XX:YY]. ":YY" is optional. 1710 Parser.Lex(); 1711 int64_t RegLo, RegHi; 1712 if (getLexer().isNot(AsmToken::LBrac)) 1713 return false; 1714 Parser.Lex(); 1715 1716 if (getParser().parseAbsoluteExpression(RegLo)) 1717 return false; 1718 1719 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1720 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1721 return false; 1722 Parser.Lex(); 1723 1724 if (isRBrace) { 1725 RegHi = RegLo; 1726 } else { 1727 if (getParser().parseAbsoluteExpression(RegHi)) 1728 return false; 1729 1730 if (getLexer().isNot(AsmToken::RBrac)) 1731 return false; 1732 Parser.Lex(); 1733 } 1734 RegNum = (unsigned) RegLo; 1735 RegWidth = (RegHi - RegLo) + 1; 1736 } 1737 } 1738 } else if (getLexer().is(AsmToken::LBrac)) { 1739 // List of consecutive registers: [s0,s1,s2,s3] 1740 Parser.Lex(); 1741 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1742 return false; 1743 if (RegWidth != 1) 1744 return false; 1745 RegisterKind RegKind1; 1746 unsigned Reg1, RegNum1, RegWidth1; 1747 do { 1748 if (getLexer().is(AsmToken::Comma)) { 1749 Parser.Lex(); 1750 } else if (getLexer().is(AsmToken::RBrac)) { 1751 Parser.Lex(); 1752 break; 1753 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1754 if (RegWidth1 != 1) { 1755 return false; 1756 } 1757 if (RegKind1 != RegKind) { 1758 return false; 1759 } 1760 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1761 return false; 1762 } 1763 } else { 1764 return false; 1765 } 1766 } while (true); 1767 } else { 1768 return false; 1769 } 1770 switch (RegKind) { 1771 case IS_SPECIAL: 1772 RegNum = 0; 1773 RegWidth = 1; 1774 break; 1775 case IS_VGPR: 1776 case IS_SGPR: 1777 case IS_TTMP: 1778 { 1779 unsigned Size = 1; 1780 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1781 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1782 Size = std::min(RegWidth, 4u); 1783 } 1784 if (RegNum % Size != 0) 1785 return false; 1786 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1787 RegNum = RegNum / Size; 1788 int RCID = getRegClass(RegKind, RegWidth); 1789 if (RCID == -1) 1790 return false; 1791 const MCRegisterClass RC = TRI->getRegClass(RCID); 1792 if (RegNum >= RC.getNumRegs()) 1793 return false; 1794 Reg = RC.getRegister(RegNum); 1795 break; 1796 } 1797 1798 default: 1799 llvm_unreachable("unexpected register kind"); 1800 } 1801 1802 if (!subtargetHasRegister(*TRI, Reg)) 1803 return false; 1804 return true; 1805 } 1806 1807 Optional<StringRef> 1808 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1809 switch (RegKind) { 1810 case IS_VGPR: 1811 return StringRef(".amdgcn.next_free_vgpr"); 1812 case IS_SGPR: 1813 return StringRef(".amdgcn.next_free_sgpr"); 1814 default: 1815 return None; 1816 } 1817 } 1818 1819 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1820 auto SymbolName = getGprCountSymbolName(RegKind); 1821 assert(SymbolName && "initializing invalid register kind"); 1822 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1823 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1824 } 1825 1826 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1827 unsigned DwordRegIndex, 1828 unsigned RegWidth) { 1829 // Symbols are only defined for GCN targets 1830 if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) 1831 return true; 1832 1833 auto SymbolName = getGprCountSymbolName(RegKind); 1834 if (!SymbolName) 1835 return true; 1836 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1837 1838 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1839 int64_t OldCount; 1840 1841 if (!Sym->isVariable()) 1842 return !Error(getParser().getTok().getLoc(), 1843 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1844 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1845 return !Error( 1846 getParser().getTok().getLoc(), 1847 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1848 1849 if (OldCount <= NewMax) 1850 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1851 1852 return true; 1853 } 1854 1855 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1856 const auto &Tok = Parser.getTok(); 1857 SMLoc StartLoc = Tok.getLoc(); 1858 SMLoc EndLoc = Tok.getEndLoc(); 1859 RegisterKind RegKind; 1860 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1861 1862 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1863 return nullptr; 1864 } 1865 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1866 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1867 return nullptr; 1868 } else 1869 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1870 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1871 } 1872 1873 bool 1874 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1875 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1876 (getLexer().getKind() == AsmToken::Integer || 1877 getLexer().getKind() == AsmToken::Real)) { 1878 // This is a workaround for handling operands like these: 1879 // |1.0| 1880 // |-1| 1881 // This syntax is not compatible with syntax of standard 1882 // MC expressions (due to the trailing '|'). 1883 1884 SMLoc EndLoc; 1885 const MCExpr *Expr; 1886 1887 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1888 return true; 1889 } 1890 1891 return !Expr->evaluateAsAbsolute(Val); 1892 } 1893 1894 return getParser().parseAbsoluteExpression(Val); 1895 } 1896 1897 OperandMatchResultTy 1898 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1899 // TODO: add syntactic sugar for 1/(2*PI) 1900 bool Minus = false; 1901 if (getLexer().getKind() == AsmToken::Minus) { 1902 const AsmToken NextToken = getLexer().peekTok(); 1903 if (!NextToken.is(AsmToken::Integer) && 1904 !NextToken.is(AsmToken::Real)) { 1905 return MatchOperand_NoMatch; 1906 } 1907 Minus = true; 1908 Parser.Lex(); 1909 } 1910 1911 SMLoc S = Parser.getTok().getLoc(); 1912 switch(getLexer().getKind()) { 1913 case AsmToken::Integer: { 1914 int64_t IntVal; 1915 if (parseAbsoluteExpr(IntVal, AbsMod)) 1916 return MatchOperand_ParseFail; 1917 if (Minus) 1918 IntVal *= -1; 1919 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1920 return MatchOperand_Success; 1921 } 1922 case AsmToken::Real: { 1923 int64_t IntVal; 1924 if (parseAbsoluteExpr(IntVal, AbsMod)) 1925 return MatchOperand_ParseFail; 1926 1927 APFloat F(BitsToDouble(IntVal)); 1928 if (Minus) 1929 F.changeSign(); 1930 Operands.push_back( 1931 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1932 AMDGPUOperand::ImmTyNone, true)); 1933 return MatchOperand_Success; 1934 } 1935 default: 1936 return MatchOperand_NoMatch; 1937 } 1938 } 1939 1940 OperandMatchResultTy 1941 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1942 if (auto R = parseRegister()) { 1943 assert(R->isReg()); 1944 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1945 Operands.push_back(std::move(R)); 1946 return MatchOperand_Success; 1947 } 1948 return MatchOperand_NoMatch; 1949 } 1950 1951 OperandMatchResultTy 1952 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1953 auto res = parseImm(Operands, AbsMod); 1954 if (res != MatchOperand_NoMatch) { 1955 return res; 1956 } 1957 1958 return parseReg(Operands); 1959 } 1960 1961 OperandMatchResultTy 1962 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1963 bool AllowImm) { 1964 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1965 1966 if (getLexer().getKind()== AsmToken::Minus) { 1967 const AsmToken NextToken = getLexer().peekTok(); 1968 1969 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1970 if (NextToken.is(AsmToken::Minus)) { 1971 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1972 return MatchOperand_ParseFail; 1973 } 1974 1975 // '-' followed by an integer literal N should be interpreted as integer 1976 // negation rather than a floating-point NEG modifier applied to N. 1977 // Beside being contr-intuitive, such use of floating-point NEG modifier 1978 // results in different meaning of integer literals used with VOP1/2/C 1979 // and VOP3, for example: 1980 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 1981 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 1982 // Negative fp literals should be handled likewise for unifomtity 1983 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 1984 Parser.Lex(); 1985 Negate = true; 1986 } 1987 } 1988 1989 if (getLexer().getKind() == AsmToken::Identifier && 1990 Parser.getTok().getString() == "neg") { 1991 if (Negate) { 1992 Error(Parser.getTok().getLoc(), "expected register or immediate"); 1993 return MatchOperand_ParseFail; 1994 } 1995 Parser.Lex(); 1996 Negate2 = true; 1997 if (getLexer().isNot(AsmToken::LParen)) { 1998 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 1999 return MatchOperand_ParseFail; 2000 } 2001 Parser.Lex(); 2002 } 2003 2004 if (getLexer().getKind() == AsmToken::Identifier && 2005 Parser.getTok().getString() == "abs") { 2006 Parser.Lex(); 2007 Abs2 = true; 2008 if (getLexer().isNot(AsmToken::LParen)) { 2009 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2010 return MatchOperand_ParseFail; 2011 } 2012 Parser.Lex(); 2013 } 2014 2015 if (getLexer().getKind() == AsmToken::Pipe) { 2016 if (Abs2) { 2017 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2018 return MatchOperand_ParseFail; 2019 } 2020 Parser.Lex(); 2021 Abs = true; 2022 } 2023 2024 OperandMatchResultTy Res; 2025 if (AllowImm) { 2026 Res = parseRegOrImm(Operands, Abs); 2027 } else { 2028 Res = parseReg(Operands); 2029 } 2030 if (Res != MatchOperand_Success) { 2031 return Res; 2032 } 2033 2034 AMDGPUOperand::Modifiers Mods; 2035 if (Abs) { 2036 if (getLexer().getKind() != AsmToken::Pipe) { 2037 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2038 return MatchOperand_ParseFail; 2039 } 2040 Parser.Lex(); 2041 Mods.Abs = true; 2042 } 2043 if (Abs2) { 2044 if (getLexer().isNot(AsmToken::RParen)) { 2045 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2046 return MatchOperand_ParseFail; 2047 } 2048 Parser.Lex(); 2049 Mods.Abs = true; 2050 } 2051 2052 if (Negate) { 2053 Mods.Neg = true; 2054 } else if (Negate2) { 2055 if (getLexer().isNot(AsmToken::RParen)) { 2056 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2057 return MatchOperand_ParseFail; 2058 } 2059 Parser.Lex(); 2060 Mods.Neg = true; 2061 } 2062 2063 if (Mods.hasFPModifiers()) { 2064 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2065 Op.setModifiers(Mods); 2066 } 2067 return MatchOperand_Success; 2068 } 2069 2070 OperandMatchResultTy 2071 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2072 bool AllowImm) { 2073 bool Sext = false; 2074 2075 if (getLexer().getKind() == AsmToken::Identifier && 2076 Parser.getTok().getString() == "sext") { 2077 Parser.Lex(); 2078 Sext = true; 2079 if (getLexer().isNot(AsmToken::LParen)) { 2080 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2081 return MatchOperand_ParseFail; 2082 } 2083 Parser.Lex(); 2084 } 2085 2086 OperandMatchResultTy Res; 2087 if (AllowImm) { 2088 Res = parseRegOrImm(Operands); 2089 } else { 2090 Res = parseReg(Operands); 2091 } 2092 if (Res != MatchOperand_Success) { 2093 return Res; 2094 } 2095 2096 AMDGPUOperand::Modifiers Mods; 2097 if (Sext) { 2098 if (getLexer().isNot(AsmToken::RParen)) { 2099 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2100 return MatchOperand_ParseFail; 2101 } 2102 Parser.Lex(); 2103 Mods.Sext = true; 2104 } 2105 2106 if (Mods.hasIntModifiers()) { 2107 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2108 Op.setModifiers(Mods); 2109 } 2110 2111 return MatchOperand_Success; 2112 } 2113 2114 OperandMatchResultTy 2115 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2116 return parseRegOrImmWithFPInputMods(Operands, false); 2117 } 2118 2119 OperandMatchResultTy 2120 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2121 return parseRegOrImmWithIntInputMods(Operands, false); 2122 } 2123 2124 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2125 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2126 if (Reg) { 2127 Operands.push_back(std::move(Reg)); 2128 return MatchOperand_Success; 2129 } 2130 2131 const AsmToken &Tok = Parser.getTok(); 2132 if (Tok.getString() == "off") { 2133 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2134 AMDGPUOperand::ImmTyOff, false)); 2135 Parser.Lex(); 2136 return MatchOperand_Success; 2137 } 2138 2139 return MatchOperand_NoMatch; 2140 } 2141 2142 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2143 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2144 2145 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2146 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2147 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2148 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2149 return Match_InvalidOperand; 2150 2151 if ((TSFlags & SIInstrFlags::VOP3) && 2152 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2153 getForcedEncodingSize() != 64) 2154 return Match_PreferE32; 2155 2156 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2157 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2158 // v_mac_f32/16 allow only dst_sel == DWORD; 2159 auto OpNum = 2160 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2161 const auto &Op = Inst.getOperand(OpNum); 2162 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2163 return Match_InvalidOperand; 2164 } 2165 } 2166 2167 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2168 // FIXME: Produces error without correct column reported. 2169 auto OpNum = 2170 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2171 const auto &Op = Inst.getOperand(OpNum); 2172 if (Op.getImm() != 0) 2173 return Match_InvalidOperand; 2174 } 2175 2176 return Match_Success; 2177 } 2178 2179 // What asm variants we should check 2180 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2181 if (getForcedEncodingSize() == 32) { 2182 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2183 return makeArrayRef(Variants); 2184 } 2185 2186 if (isForcedVOP3()) { 2187 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2188 return makeArrayRef(Variants); 2189 } 2190 2191 if (isForcedSDWA()) { 2192 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2193 AMDGPUAsmVariants::SDWA9}; 2194 return makeArrayRef(Variants); 2195 } 2196 2197 if (isForcedDPP()) { 2198 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2199 return makeArrayRef(Variants); 2200 } 2201 2202 static const unsigned Variants[] = { 2203 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2204 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2205 }; 2206 2207 return makeArrayRef(Variants); 2208 } 2209 2210 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2211 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2212 const unsigned Num = Desc.getNumImplicitUses(); 2213 for (unsigned i = 0; i < Num; ++i) { 2214 unsigned Reg = Desc.ImplicitUses[i]; 2215 switch (Reg) { 2216 case AMDGPU::FLAT_SCR: 2217 case AMDGPU::VCC: 2218 case AMDGPU::M0: 2219 return Reg; 2220 default: 2221 break; 2222 } 2223 } 2224 return AMDGPU::NoRegister; 2225 } 2226 2227 // NB: This code is correct only when used to check constant 2228 // bus limitations because GFX7 support no f16 inline constants. 2229 // Note that there are no cases when a GFX7 opcode violates 2230 // constant bus limitations due to the use of an f16 constant. 2231 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2232 unsigned OpIdx) const { 2233 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2234 2235 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2236 return false; 2237 } 2238 2239 const MCOperand &MO = Inst.getOperand(OpIdx); 2240 2241 int64_t Val = MO.getImm(); 2242 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2243 2244 switch (OpSize) { // expected operand size 2245 case 8: 2246 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2247 case 4: 2248 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2249 case 2: { 2250 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2251 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2252 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2253 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2254 } else { 2255 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2256 } 2257 } 2258 default: 2259 llvm_unreachable("invalid operand size"); 2260 } 2261 } 2262 2263 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2264 const MCOperand &MO = Inst.getOperand(OpIdx); 2265 if (MO.isImm()) { 2266 return !isInlineConstant(Inst, OpIdx); 2267 } 2268 return !MO.isReg() || 2269 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2270 } 2271 2272 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2273 const unsigned Opcode = Inst.getOpcode(); 2274 const MCInstrDesc &Desc = MII.get(Opcode); 2275 unsigned ConstantBusUseCount = 0; 2276 2277 if (Desc.TSFlags & 2278 (SIInstrFlags::VOPC | 2279 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2280 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2281 SIInstrFlags::SDWA)) { 2282 // Check special imm operands (used by madmk, etc) 2283 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2284 ++ConstantBusUseCount; 2285 } 2286 2287 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2288 if (SGPRUsed != AMDGPU::NoRegister) { 2289 ++ConstantBusUseCount; 2290 } 2291 2292 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2293 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2294 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2295 2296 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2297 2298 for (int OpIdx : OpIndices) { 2299 if (OpIdx == -1) break; 2300 2301 const MCOperand &MO = Inst.getOperand(OpIdx); 2302 if (usesConstantBus(Inst, OpIdx)) { 2303 if (MO.isReg()) { 2304 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2305 // Pairs of registers with a partial intersections like these 2306 // s0, s[0:1] 2307 // flat_scratch_lo, flat_scratch 2308 // flat_scratch_lo, flat_scratch_hi 2309 // are theoretically valid but they are disabled anyway. 2310 // Note that this code mimics SIInstrInfo::verifyInstruction 2311 if (Reg != SGPRUsed) { 2312 ++ConstantBusUseCount; 2313 } 2314 SGPRUsed = Reg; 2315 } else { // Expression or a literal 2316 ++ConstantBusUseCount; 2317 } 2318 } 2319 } 2320 } 2321 2322 return ConstantBusUseCount <= 1; 2323 } 2324 2325 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2326 const unsigned Opcode = Inst.getOpcode(); 2327 const MCInstrDesc &Desc = MII.get(Opcode); 2328 2329 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2330 if (DstIdx == -1 || 2331 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2332 return true; 2333 } 2334 2335 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2336 2337 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2338 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2339 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2340 2341 assert(DstIdx != -1); 2342 const MCOperand &Dst = Inst.getOperand(DstIdx); 2343 assert(Dst.isReg()); 2344 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2345 2346 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2347 2348 for (int SrcIdx : SrcIndices) { 2349 if (SrcIdx == -1) break; 2350 const MCOperand &Src = Inst.getOperand(SrcIdx); 2351 if (Src.isReg()) { 2352 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2353 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2354 return false; 2355 } 2356 } 2357 } 2358 2359 return true; 2360 } 2361 2362 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2363 2364 const unsigned Opc = Inst.getOpcode(); 2365 const MCInstrDesc &Desc = MII.get(Opc); 2366 2367 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2368 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2369 assert(ClampIdx != -1); 2370 return Inst.getOperand(ClampIdx).getImm() == 0; 2371 } 2372 2373 return true; 2374 } 2375 2376 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2377 2378 const unsigned Opc = Inst.getOpcode(); 2379 const MCInstrDesc &Desc = MII.get(Opc); 2380 2381 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2382 return true; 2383 2384 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2385 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2386 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2387 2388 assert(VDataIdx != -1); 2389 assert(DMaskIdx != -1); 2390 assert(TFEIdx != -1); 2391 2392 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2393 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2394 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2395 if (DMask == 0) 2396 DMask = 1; 2397 2398 unsigned DataSize = 2399 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2400 if (hasPackedD16()) { 2401 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2402 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2403 DataSize = (DataSize + 1) / 2; 2404 } 2405 2406 return (VDataSize / 4) == DataSize + TFESize; 2407 } 2408 2409 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2410 2411 const unsigned Opc = Inst.getOpcode(); 2412 const MCInstrDesc &Desc = MII.get(Opc); 2413 2414 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2415 return true; 2416 if (!Desc.mayLoad() || !Desc.mayStore()) 2417 return true; // Not atomic 2418 2419 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2420 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2421 2422 // This is an incomplete check because image_atomic_cmpswap 2423 // may only use 0x3 and 0xf while other atomic operations 2424 // may use 0x1 and 0x3. However these limitations are 2425 // verified when we check that dmask matches dst size. 2426 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2427 } 2428 2429 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2430 2431 const unsigned Opc = Inst.getOpcode(); 2432 const MCInstrDesc &Desc = MII.get(Opc); 2433 2434 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2435 return true; 2436 2437 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2438 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2439 2440 // GATHER4 instructions use dmask in a different fashion compared to 2441 // other MIMG instructions. The only useful DMASK values are 2442 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2443 // (red,red,red,red) etc.) The ISA document doesn't mention 2444 // this. 2445 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2446 } 2447 2448 bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) { 2449 2450 const unsigned Opc = Inst.getOpcode(); 2451 const MCInstrDesc &Desc = MII.get(Opc); 2452 2453 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2454 return true; 2455 2456 int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128); 2457 assert(Idx != -1); 2458 2459 bool R128 = (Inst.getOperand(Idx).getImm() != 0); 2460 2461 return !R128 || hasMIMG_R128(); 2462 } 2463 2464 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2465 2466 const unsigned Opc = Inst.getOpcode(); 2467 const MCInstrDesc &Desc = MII.get(Opc); 2468 2469 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2470 return true; 2471 2472 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2473 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2474 if (isCI() || isSI()) 2475 return false; 2476 } 2477 2478 return true; 2479 } 2480 2481 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2482 const SMLoc &IDLoc) { 2483 if (!validateConstantBusLimitations(Inst)) { 2484 Error(IDLoc, 2485 "invalid operand (violates constant bus restrictions)"); 2486 return false; 2487 } 2488 if (!validateEarlyClobberLimitations(Inst)) { 2489 Error(IDLoc, 2490 "destination must be different than all sources"); 2491 return false; 2492 } 2493 if (!validateIntClampSupported(Inst)) { 2494 Error(IDLoc, 2495 "integer clamping is not supported on this GPU"); 2496 return false; 2497 } 2498 if (!validateMIMGR128(Inst)) { 2499 Error(IDLoc, 2500 "r128 modifier is not supported on this GPU"); 2501 return false; 2502 } 2503 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2504 if (!validateMIMGD16(Inst)) { 2505 Error(IDLoc, 2506 "d16 modifier is not supported on this GPU"); 2507 return false; 2508 } 2509 if (!validateMIMGDataSize(Inst)) { 2510 Error(IDLoc, 2511 "image data size does not match dmask and tfe"); 2512 return false; 2513 } 2514 if (!validateMIMGAtomicDMask(Inst)) { 2515 Error(IDLoc, 2516 "invalid atomic image dmask"); 2517 return false; 2518 } 2519 if (!validateMIMGGatherDMask(Inst)) { 2520 Error(IDLoc, 2521 "invalid image_gather dmask: only one bit must be set"); 2522 return false; 2523 } 2524 2525 return true; 2526 } 2527 2528 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2529 unsigned VariantID = 0); 2530 2531 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2532 OperandVector &Operands, 2533 MCStreamer &Out, 2534 uint64_t &ErrorInfo, 2535 bool MatchingInlineAsm) { 2536 MCInst Inst; 2537 unsigned Result = Match_Success; 2538 for (auto Variant : getMatchedVariants()) { 2539 uint64_t EI; 2540 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2541 Variant); 2542 // We order match statuses from least to most specific. We use most specific 2543 // status as resulting 2544 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2545 if ((R == Match_Success) || 2546 (R == Match_PreferE32) || 2547 (R == Match_MissingFeature && Result != Match_PreferE32) || 2548 (R == Match_InvalidOperand && Result != Match_MissingFeature 2549 && Result != Match_PreferE32) || 2550 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2551 && Result != Match_MissingFeature 2552 && Result != Match_PreferE32)) { 2553 Result = R; 2554 ErrorInfo = EI; 2555 } 2556 if (R == Match_Success) 2557 break; 2558 } 2559 2560 switch (Result) { 2561 default: break; 2562 case Match_Success: 2563 if (!validateInstruction(Inst, IDLoc)) { 2564 return true; 2565 } 2566 Inst.setLoc(IDLoc); 2567 Out.EmitInstruction(Inst, getSTI()); 2568 return false; 2569 2570 case Match_MissingFeature: 2571 return Error(IDLoc, "instruction not supported on this GPU"); 2572 2573 case Match_MnemonicFail: { 2574 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2575 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2576 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2577 return Error(IDLoc, "invalid instruction" + Suggestion, 2578 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2579 } 2580 2581 case Match_InvalidOperand: { 2582 SMLoc ErrorLoc = IDLoc; 2583 if (ErrorInfo != ~0ULL) { 2584 if (ErrorInfo >= Operands.size()) { 2585 return Error(IDLoc, "too few operands for instruction"); 2586 } 2587 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2588 if (ErrorLoc == SMLoc()) 2589 ErrorLoc = IDLoc; 2590 } 2591 return Error(ErrorLoc, "invalid operand for instruction"); 2592 } 2593 2594 case Match_PreferE32: 2595 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2596 "should be encoded as e32"); 2597 } 2598 llvm_unreachable("Implement any new match types added!"); 2599 } 2600 2601 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2602 int64_t Tmp = -1; 2603 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2604 return true; 2605 } 2606 if (getParser().parseAbsoluteExpression(Tmp)) { 2607 return true; 2608 } 2609 Ret = static_cast<uint32_t>(Tmp); 2610 return false; 2611 } 2612 2613 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2614 uint32_t &Minor) { 2615 if (ParseAsAbsoluteExpression(Major)) 2616 return TokError("invalid major version"); 2617 2618 if (getLexer().isNot(AsmToken::Comma)) 2619 return TokError("minor version number required, comma expected"); 2620 Lex(); 2621 2622 if (ParseAsAbsoluteExpression(Minor)) 2623 return TokError("invalid minor version"); 2624 2625 return false; 2626 } 2627 2628 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2629 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2630 return TokError("directive only supported for amdgcn architecture"); 2631 2632 std::string Target; 2633 2634 SMLoc TargetStart = getTok().getLoc(); 2635 if (getParser().parseEscapedString(Target)) 2636 return true; 2637 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2638 2639 std::string ExpectedTarget; 2640 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2641 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2642 2643 if (Target != ExpectedTargetOS.str()) 2644 return getParser().Error(TargetRange.Start, "target must match options", 2645 TargetRange); 2646 2647 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2648 return false; 2649 } 2650 2651 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2652 return getParser().Error(Range.Start, "value out of range", Range); 2653 } 2654 2655 bool AMDGPUAsmParser::calculateGPRBlocks( 2656 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2657 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2658 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2659 unsigned &SGPRBlocks) { 2660 // TODO(scott.linder): These calculations are duplicated from 2661 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2662 IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); 2663 2664 unsigned NumVGPRs = NextFreeVGPR; 2665 unsigned NumSGPRs = NextFreeSGPR; 2666 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); 2667 2668 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2669 NumSGPRs > MaxAddressableNumSGPRs) 2670 return OutOfRangeError(SGPRRange); 2671 2672 NumSGPRs += 2673 IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); 2674 2675 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2676 NumSGPRs > MaxAddressableNumSGPRs) 2677 return OutOfRangeError(SGPRRange); 2678 2679 if (Features.test(FeatureSGPRInitBug)) 2680 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2681 2682 VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); 2683 SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); 2684 2685 return false; 2686 } 2687 2688 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2689 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2690 return TokError("directive only supported for amdgcn architecture"); 2691 2692 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2693 return TokError("directive only supported for amdhsa OS"); 2694 2695 StringRef KernelName; 2696 if (getParser().parseIdentifier(KernelName)) 2697 return true; 2698 2699 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2700 2701 StringSet<> Seen; 2702 2703 IsaInfo::IsaVersion IVersion = 2704 IsaInfo::getIsaVersion(getSTI().getFeatureBits()); 2705 2706 SMRange VGPRRange; 2707 uint64_t NextFreeVGPR = 0; 2708 SMRange SGPRRange; 2709 uint64_t NextFreeSGPR = 0; 2710 unsigned UserSGPRCount = 0; 2711 bool ReserveVCC = true; 2712 bool ReserveFlatScr = true; 2713 bool ReserveXNACK = hasXNACK(); 2714 2715 while (true) { 2716 while (getLexer().is(AsmToken::EndOfStatement)) 2717 Lex(); 2718 2719 if (getLexer().isNot(AsmToken::Identifier)) 2720 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2721 2722 StringRef ID = getTok().getIdentifier(); 2723 SMRange IDRange = getTok().getLocRange(); 2724 Lex(); 2725 2726 if (ID == ".end_amdhsa_kernel") 2727 break; 2728 2729 if (Seen.find(ID) != Seen.end()) 2730 return TokError(".amdhsa_ directives cannot be repeated"); 2731 Seen.insert(ID); 2732 2733 SMLoc ValStart = getTok().getLoc(); 2734 int64_t IVal; 2735 if (getParser().parseAbsoluteExpression(IVal)) 2736 return true; 2737 SMLoc ValEnd = getTok().getLoc(); 2738 SMRange ValRange = SMRange(ValStart, ValEnd); 2739 2740 if (IVal < 0) 2741 return OutOfRangeError(ValRange); 2742 2743 uint64_t Val = IVal; 2744 2745 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2746 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2747 return OutOfRangeError(RANGE); \ 2748 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2749 2750 if (ID == ".amdhsa_group_segment_fixed_size") { 2751 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2752 return OutOfRangeError(ValRange); 2753 KD.group_segment_fixed_size = Val; 2754 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2755 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2756 return OutOfRangeError(ValRange); 2757 KD.private_segment_fixed_size = Val; 2758 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2759 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2760 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2761 Val, ValRange); 2762 UserSGPRCount++; 2763 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2764 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2765 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2766 ValRange); 2767 UserSGPRCount++; 2768 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2769 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2770 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2771 ValRange); 2772 UserSGPRCount++; 2773 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2774 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2775 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2776 Val, ValRange); 2777 UserSGPRCount++; 2778 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2779 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2780 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2781 ValRange); 2782 UserSGPRCount++; 2783 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2784 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2785 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2786 ValRange); 2787 UserSGPRCount++; 2788 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2789 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2790 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2791 Val, ValRange); 2792 UserSGPRCount++; 2793 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2794 PARSE_BITS_ENTRY( 2795 KD.compute_pgm_rsrc2, 2796 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2797 ValRange); 2798 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2799 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2800 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2801 ValRange); 2802 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2804 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2805 ValRange); 2806 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2808 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2809 ValRange); 2810 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2812 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2813 ValRange); 2814 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2815 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2816 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2817 ValRange); 2818 } else if (ID == ".amdhsa_next_free_vgpr") { 2819 VGPRRange = ValRange; 2820 NextFreeVGPR = Val; 2821 } else if (ID == ".amdhsa_next_free_sgpr") { 2822 SGPRRange = ValRange; 2823 NextFreeSGPR = Val; 2824 } else if (ID == ".amdhsa_reserve_vcc") { 2825 if (!isUInt<1>(Val)) 2826 return OutOfRangeError(ValRange); 2827 ReserveVCC = Val; 2828 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2829 if (IVersion.Major < 7) 2830 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2831 IDRange); 2832 if (!isUInt<1>(Val)) 2833 return OutOfRangeError(ValRange); 2834 ReserveFlatScr = Val; 2835 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2836 if (IVersion.Major < 8) 2837 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2838 IDRange); 2839 if (!isUInt<1>(Val)) 2840 return OutOfRangeError(ValRange); 2841 ReserveXNACK = Val; 2842 } else if (ID == ".amdhsa_float_round_mode_32") { 2843 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2844 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2845 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2846 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2847 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2848 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2849 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2850 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2851 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2852 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2853 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2854 ValRange); 2855 } else if (ID == ".amdhsa_dx10_clamp") { 2856 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2857 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 2858 } else if (ID == ".amdhsa_ieee_mode") { 2859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 2860 Val, ValRange); 2861 } else if (ID == ".amdhsa_fp16_overflow") { 2862 if (IVersion.Major < 9) 2863 return getParser().Error(IDRange.Start, "directive requires gfx9+", 2864 IDRange); 2865 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 2866 ValRange); 2867 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 2868 PARSE_BITS_ENTRY( 2869 KD.compute_pgm_rsrc2, 2870 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 2871 ValRange); 2872 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 2873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2874 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 2875 Val, ValRange); 2876 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 2877 PARSE_BITS_ENTRY( 2878 KD.compute_pgm_rsrc2, 2879 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 2880 ValRange); 2881 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 2882 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2883 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 2884 Val, ValRange); 2885 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 2886 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2887 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 2888 Val, ValRange); 2889 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 2890 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2891 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 2892 Val, ValRange); 2893 } else if (ID == ".amdhsa_exception_int_div_zero") { 2894 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2895 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 2896 Val, ValRange); 2897 } else { 2898 return getParser().Error(IDRange.Start, 2899 "unknown .amdhsa_kernel directive", IDRange); 2900 } 2901 2902 #undef PARSE_BITS_ENTRY 2903 } 2904 2905 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 2906 return TokError(".amdhsa_next_free_vgpr directive is required"); 2907 2908 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 2909 return TokError(".amdhsa_next_free_sgpr directive is required"); 2910 2911 unsigned VGPRBlocks; 2912 unsigned SGPRBlocks; 2913 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 2914 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 2915 SGPRRange, VGPRBlocks, SGPRBlocks)) 2916 return true; 2917 2918 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 2919 VGPRBlocks)) 2920 return OutOfRangeError(VGPRRange); 2921 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2922 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 2923 2924 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 2925 SGPRBlocks)) 2926 return OutOfRangeError(SGPRRange); 2927 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2928 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 2929 SGPRBlocks); 2930 2931 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 2932 return TokError("too many user SGPRs enabled"); 2933 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 2934 UserSGPRCount); 2935 2936 getTargetStreamer().EmitAmdhsaKernelDescriptor( 2937 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 2938 ReserveFlatScr, ReserveXNACK); 2939 return false; 2940 } 2941 2942 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 2943 uint32_t Major; 2944 uint32_t Minor; 2945 2946 if (ParseDirectiveMajorMinor(Major, Minor)) 2947 return true; 2948 2949 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 2950 return false; 2951 } 2952 2953 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 2954 uint32_t Major; 2955 uint32_t Minor; 2956 uint32_t Stepping; 2957 StringRef VendorName; 2958 StringRef ArchName; 2959 2960 // If this directive has no arguments, then use the ISA version for the 2961 // targeted GPU. 2962 if (getLexer().is(AsmToken::EndOfStatement)) { 2963 AMDGPU::IsaInfo::IsaVersion ISA = 2964 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 2965 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 2966 ISA.Stepping, 2967 "AMD", "AMDGPU"); 2968 return false; 2969 } 2970 2971 if (ParseDirectiveMajorMinor(Major, Minor)) 2972 return true; 2973 2974 if (getLexer().isNot(AsmToken::Comma)) 2975 return TokError("stepping version number required, comma expected"); 2976 Lex(); 2977 2978 if (ParseAsAbsoluteExpression(Stepping)) 2979 return TokError("invalid stepping version"); 2980 2981 if (getLexer().isNot(AsmToken::Comma)) 2982 return TokError("vendor name required, comma expected"); 2983 Lex(); 2984 2985 if (getLexer().isNot(AsmToken::String)) 2986 return TokError("invalid vendor name"); 2987 2988 VendorName = getLexer().getTok().getStringContents(); 2989 Lex(); 2990 2991 if (getLexer().isNot(AsmToken::Comma)) 2992 return TokError("arch name required, comma expected"); 2993 Lex(); 2994 2995 if (getLexer().isNot(AsmToken::String)) 2996 return TokError("invalid arch name"); 2997 2998 ArchName = getLexer().getTok().getStringContents(); 2999 Lex(); 3000 3001 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3002 VendorName, ArchName); 3003 return false; 3004 } 3005 3006 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3007 amd_kernel_code_t &Header) { 3008 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3009 // assembly for backwards compatibility. 3010 if (ID == "max_scratch_backing_memory_byte_size") { 3011 Parser.eatToEndOfStatement(); 3012 return false; 3013 } 3014 3015 SmallString<40> ErrStr; 3016 raw_svector_ostream Err(ErrStr); 3017 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3018 return TokError(Err.str()); 3019 } 3020 Lex(); 3021 return false; 3022 } 3023 3024 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3025 amd_kernel_code_t Header; 3026 AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); 3027 3028 while (true) { 3029 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3030 // will set the current token to EndOfStatement. 3031 while(getLexer().is(AsmToken::EndOfStatement)) 3032 Lex(); 3033 3034 if (getLexer().isNot(AsmToken::Identifier)) 3035 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3036 3037 StringRef ID = getLexer().getTok().getIdentifier(); 3038 Lex(); 3039 3040 if (ID == ".end_amd_kernel_code_t") 3041 break; 3042 3043 if (ParseAMDKernelCodeTValue(ID, Header)) 3044 return true; 3045 } 3046 3047 getTargetStreamer().EmitAMDKernelCodeT(Header); 3048 3049 return false; 3050 } 3051 3052 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3053 if (getLexer().isNot(AsmToken::Identifier)) 3054 return TokError("expected symbol name"); 3055 3056 StringRef KernelName = Parser.getTok().getString(); 3057 3058 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3059 ELF::STT_AMDGPU_HSA_KERNEL); 3060 Lex(); 3061 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3062 KernelScope.initialize(getContext()); 3063 return false; 3064 } 3065 3066 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3067 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3068 return Error(getParser().getTok().getLoc(), 3069 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3070 "architectures"); 3071 } 3072 3073 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3074 3075 std::string ISAVersionStringFromSTI; 3076 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3077 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3078 3079 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3080 return Error(getParser().getTok().getLoc(), 3081 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3082 "arguments specified through the command line"); 3083 } 3084 3085 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3086 Lex(); 3087 3088 return false; 3089 } 3090 3091 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3092 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3093 return Error(getParser().getTok().getLoc(), 3094 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " 3095 "not available on non-amdhsa OSes")).str()); 3096 } 3097 3098 std::string HSAMetadataString; 3099 raw_string_ostream YamlStream(HSAMetadataString); 3100 3101 getLexer().setSkipSpace(false); 3102 3103 bool FoundEnd = false; 3104 while (!getLexer().is(AsmToken::Eof)) { 3105 while (getLexer().is(AsmToken::Space)) { 3106 YamlStream << getLexer().getTok().getString(); 3107 Lex(); 3108 } 3109 3110 if (getLexer().is(AsmToken::Identifier)) { 3111 StringRef ID = getLexer().getTok().getIdentifier(); 3112 if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { 3113 Lex(); 3114 FoundEnd = true; 3115 break; 3116 } 3117 } 3118 3119 YamlStream << Parser.parseStringToEndOfStatement() 3120 << getContext().getAsmInfo()->getSeparatorString(); 3121 3122 Parser.eatToEndOfStatement(); 3123 } 3124 3125 getLexer().setSkipSpace(true); 3126 3127 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3128 return TokError(Twine("expected directive ") + 3129 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3130 } 3131 3132 YamlStream.flush(); 3133 3134 if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) 3135 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3136 3137 return false; 3138 } 3139 3140 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3141 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3142 return Error(getParser().getTok().getLoc(), 3143 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3144 "not available on non-amdpal OSes")).str()); 3145 } 3146 3147 PALMD::Metadata PALMetadata; 3148 for (;;) { 3149 uint32_t Value; 3150 if (ParseAsAbsoluteExpression(Value)) { 3151 return TokError(Twine("invalid value in ") + 3152 Twine(PALMD::AssemblerDirective)); 3153 } 3154 PALMetadata.push_back(Value); 3155 if (getLexer().isNot(AsmToken::Comma)) 3156 break; 3157 Lex(); 3158 } 3159 getTargetStreamer().EmitPALMetadata(PALMetadata); 3160 return false; 3161 } 3162 3163 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3164 StringRef IDVal = DirectiveID.getString(); 3165 3166 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3167 if (IDVal == ".amdgcn_target") 3168 return ParseDirectiveAMDGCNTarget(); 3169 3170 if (IDVal == ".amdhsa_kernel") 3171 return ParseDirectiveAMDHSAKernel(); 3172 } else { 3173 if (IDVal == ".hsa_code_object_version") 3174 return ParseDirectiveHSACodeObjectVersion(); 3175 3176 if (IDVal == ".hsa_code_object_isa") 3177 return ParseDirectiveHSACodeObjectISA(); 3178 3179 if (IDVal == ".amd_kernel_code_t") 3180 return ParseDirectiveAMDKernelCodeT(); 3181 3182 if (IDVal == ".amdgpu_hsa_kernel") 3183 return ParseDirectiveAMDGPUHsaKernel(); 3184 3185 if (IDVal == ".amd_amdgpu_isa") 3186 return ParseDirectiveISAVersion(); 3187 } 3188 3189 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3190 return ParseDirectiveHSAMetadata(); 3191 3192 if (IDVal == PALMD::AssemblerDirective) 3193 return ParseDirectivePALMetadata(); 3194 3195 return true; 3196 } 3197 3198 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3199 unsigned RegNo) const { 3200 3201 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3202 R.isValid(); ++R) { 3203 if (*R == RegNo) 3204 return isGFX9(); 3205 } 3206 3207 switch (RegNo) { 3208 case AMDGPU::TBA: 3209 case AMDGPU::TBA_LO: 3210 case AMDGPU::TBA_HI: 3211 case AMDGPU::TMA: 3212 case AMDGPU::TMA_LO: 3213 case AMDGPU::TMA_HI: 3214 return !isGFX9(); 3215 case AMDGPU::XNACK_MASK: 3216 case AMDGPU::XNACK_MASK_LO: 3217 case AMDGPU::XNACK_MASK_HI: 3218 return !isCI() && !isSI() && hasXNACK(); 3219 default: 3220 break; 3221 } 3222 3223 if (isCI()) 3224 return true; 3225 3226 if (isSI()) { 3227 // No flat_scr 3228 switch (RegNo) { 3229 case AMDGPU::FLAT_SCR: 3230 case AMDGPU::FLAT_SCR_LO: 3231 case AMDGPU::FLAT_SCR_HI: 3232 return false; 3233 default: 3234 return true; 3235 } 3236 } 3237 3238 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3239 // SI/CI have. 3240 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3241 R.isValid(); ++R) { 3242 if (*R == RegNo) 3243 return false; 3244 } 3245 3246 return true; 3247 } 3248 3249 OperandMatchResultTy 3250 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3251 // Try to parse with a custom parser 3252 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3253 3254 // If we successfully parsed the operand or if there as an error parsing, 3255 // we are done. 3256 // 3257 // If we are parsing after we reach EndOfStatement then this means we 3258 // are appending default values to the Operands list. This is only done 3259 // by custom parser, so we shouldn't continue on to the generic parsing. 3260 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3261 getLexer().is(AsmToken::EndOfStatement)) 3262 return ResTy; 3263 3264 ResTy = parseRegOrImm(Operands); 3265 3266 if (ResTy == MatchOperand_Success) 3267 return ResTy; 3268 3269 const auto &Tok = Parser.getTok(); 3270 SMLoc S = Tok.getLoc(); 3271 3272 const MCExpr *Expr = nullptr; 3273 if (!Parser.parseExpression(Expr)) { 3274 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3275 return MatchOperand_Success; 3276 } 3277 3278 // Possibly this is an instruction flag like 'gds'. 3279 if (Tok.getKind() == AsmToken::Identifier) { 3280 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3281 Parser.Lex(); 3282 return MatchOperand_Success; 3283 } 3284 3285 return MatchOperand_NoMatch; 3286 } 3287 3288 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3289 // Clear any forced encodings from the previous instruction. 3290 setForcedEncodingSize(0); 3291 setForcedDPP(false); 3292 setForcedSDWA(false); 3293 3294 if (Name.endswith("_e64")) { 3295 setForcedEncodingSize(64); 3296 return Name.substr(0, Name.size() - 4); 3297 } else if (Name.endswith("_e32")) { 3298 setForcedEncodingSize(32); 3299 return Name.substr(0, Name.size() - 4); 3300 } else if (Name.endswith("_dpp")) { 3301 setForcedDPP(true); 3302 return Name.substr(0, Name.size() - 4); 3303 } else if (Name.endswith("_sdwa")) { 3304 setForcedSDWA(true); 3305 return Name.substr(0, Name.size() - 5); 3306 } 3307 return Name; 3308 } 3309 3310 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3311 StringRef Name, 3312 SMLoc NameLoc, OperandVector &Operands) { 3313 // Add the instruction mnemonic 3314 Name = parseMnemonicSuffix(Name); 3315 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3316 3317 while (!getLexer().is(AsmToken::EndOfStatement)) { 3318 OperandMatchResultTy Res = parseOperand(Operands, Name); 3319 3320 // Eat the comma or space if there is one. 3321 if (getLexer().is(AsmToken::Comma)) 3322 Parser.Lex(); 3323 3324 switch (Res) { 3325 case MatchOperand_Success: break; 3326 case MatchOperand_ParseFail: 3327 Error(getLexer().getLoc(), "failed parsing operand."); 3328 while (!getLexer().is(AsmToken::EndOfStatement)) { 3329 Parser.Lex(); 3330 } 3331 return true; 3332 case MatchOperand_NoMatch: 3333 Error(getLexer().getLoc(), "not a valid operand."); 3334 while (!getLexer().is(AsmToken::EndOfStatement)) { 3335 Parser.Lex(); 3336 } 3337 return true; 3338 } 3339 } 3340 3341 return false; 3342 } 3343 3344 //===----------------------------------------------------------------------===// 3345 // Utility functions 3346 //===----------------------------------------------------------------------===// 3347 3348 OperandMatchResultTy 3349 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3350 switch(getLexer().getKind()) { 3351 default: return MatchOperand_NoMatch; 3352 case AsmToken::Identifier: { 3353 StringRef Name = Parser.getTok().getString(); 3354 if (!Name.equals(Prefix)) { 3355 return MatchOperand_NoMatch; 3356 } 3357 3358 Parser.Lex(); 3359 if (getLexer().isNot(AsmToken::Colon)) 3360 return MatchOperand_ParseFail; 3361 3362 Parser.Lex(); 3363 3364 bool IsMinus = false; 3365 if (getLexer().getKind() == AsmToken::Minus) { 3366 Parser.Lex(); 3367 IsMinus = true; 3368 } 3369 3370 if (getLexer().isNot(AsmToken::Integer)) 3371 return MatchOperand_ParseFail; 3372 3373 if (getParser().parseAbsoluteExpression(Int)) 3374 return MatchOperand_ParseFail; 3375 3376 if (IsMinus) 3377 Int = -Int; 3378 break; 3379 } 3380 } 3381 return MatchOperand_Success; 3382 } 3383 3384 OperandMatchResultTy 3385 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3386 AMDGPUOperand::ImmTy ImmTy, 3387 bool (*ConvertResult)(int64_t&)) { 3388 SMLoc S = Parser.getTok().getLoc(); 3389 int64_t Value = 0; 3390 3391 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3392 if (Res != MatchOperand_Success) 3393 return Res; 3394 3395 if (ConvertResult && !ConvertResult(Value)) { 3396 return MatchOperand_ParseFail; 3397 } 3398 3399 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3400 return MatchOperand_Success; 3401 } 3402 3403 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3404 const char *Prefix, 3405 OperandVector &Operands, 3406 AMDGPUOperand::ImmTy ImmTy, 3407 bool (*ConvertResult)(int64_t&)) { 3408 StringRef Name = Parser.getTok().getString(); 3409 if (!Name.equals(Prefix)) 3410 return MatchOperand_NoMatch; 3411 3412 Parser.Lex(); 3413 if (getLexer().isNot(AsmToken::Colon)) 3414 return MatchOperand_ParseFail; 3415 3416 Parser.Lex(); 3417 if (getLexer().isNot(AsmToken::LBrac)) 3418 return MatchOperand_ParseFail; 3419 Parser.Lex(); 3420 3421 unsigned Val = 0; 3422 SMLoc S = Parser.getTok().getLoc(); 3423 3424 // FIXME: How to verify the number of elements matches the number of src 3425 // operands? 3426 for (int I = 0; I < 4; ++I) { 3427 if (I != 0) { 3428 if (getLexer().is(AsmToken::RBrac)) 3429 break; 3430 3431 if (getLexer().isNot(AsmToken::Comma)) 3432 return MatchOperand_ParseFail; 3433 Parser.Lex(); 3434 } 3435 3436 if (getLexer().isNot(AsmToken::Integer)) 3437 return MatchOperand_ParseFail; 3438 3439 int64_t Op; 3440 if (getParser().parseAbsoluteExpression(Op)) 3441 return MatchOperand_ParseFail; 3442 3443 if (Op != 0 && Op != 1) 3444 return MatchOperand_ParseFail; 3445 Val |= (Op << I); 3446 } 3447 3448 Parser.Lex(); 3449 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3450 return MatchOperand_Success; 3451 } 3452 3453 OperandMatchResultTy 3454 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3455 AMDGPUOperand::ImmTy ImmTy) { 3456 int64_t Bit = 0; 3457 SMLoc S = Parser.getTok().getLoc(); 3458 3459 // We are at the end of the statement, and this is a default argument, so 3460 // use a default value. 3461 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3462 switch(getLexer().getKind()) { 3463 case AsmToken::Identifier: { 3464 StringRef Tok = Parser.getTok().getString(); 3465 if (Tok == Name) { 3466 Bit = 1; 3467 Parser.Lex(); 3468 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3469 Bit = 0; 3470 Parser.Lex(); 3471 } else { 3472 return MatchOperand_NoMatch; 3473 } 3474 break; 3475 } 3476 default: 3477 return MatchOperand_NoMatch; 3478 } 3479 } 3480 3481 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3482 return MatchOperand_Success; 3483 } 3484 3485 static void addOptionalImmOperand( 3486 MCInst& Inst, const OperandVector& Operands, 3487 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3488 AMDGPUOperand::ImmTy ImmT, 3489 int64_t Default = 0) { 3490 auto i = OptionalIdx.find(ImmT); 3491 if (i != OptionalIdx.end()) { 3492 unsigned Idx = i->second; 3493 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3494 } else { 3495 Inst.addOperand(MCOperand::createImm(Default)); 3496 } 3497 } 3498 3499 OperandMatchResultTy 3500 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3501 if (getLexer().isNot(AsmToken::Identifier)) { 3502 return MatchOperand_NoMatch; 3503 } 3504 StringRef Tok = Parser.getTok().getString(); 3505 if (Tok != Prefix) { 3506 return MatchOperand_NoMatch; 3507 } 3508 3509 Parser.Lex(); 3510 if (getLexer().isNot(AsmToken::Colon)) { 3511 return MatchOperand_ParseFail; 3512 } 3513 3514 Parser.Lex(); 3515 if (getLexer().isNot(AsmToken::Identifier)) { 3516 return MatchOperand_ParseFail; 3517 } 3518 3519 Value = Parser.getTok().getString(); 3520 return MatchOperand_Success; 3521 } 3522 3523 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3524 // values to live in a joint format operand in the MCInst encoding. 3525 OperandMatchResultTy 3526 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3527 SMLoc S = Parser.getTok().getLoc(); 3528 int64_t Dfmt = 0, Nfmt = 0; 3529 // dfmt and nfmt can appear in either order, and each is optional. 3530 bool GotDfmt = false, GotNfmt = false; 3531 while (!GotDfmt || !GotNfmt) { 3532 if (!GotDfmt) { 3533 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3534 if (Res != MatchOperand_NoMatch) { 3535 if (Res != MatchOperand_Success) 3536 return Res; 3537 if (Dfmt >= 16) { 3538 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3539 return MatchOperand_ParseFail; 3540 } 3541 GotDfmt = true; 3542 Parser.Lex(); 3543 continue; 3544 } 3545 } 3546 if (!GotNfmt) { 3547 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3548 if (Res != MatchOperand_NoMatch) { 3549 if (Res != MatchOperand_Success) 3550 return Res; 3551 if (Nfmt >= 8) { 3552 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3553 return MatchOperand_ParseFail; 3554 } 3555 GotNfmt = true; 3556 Parser.Lex(); 3557 continue; 3558 } 3559 } 3560 break; 3561 } 3562 if (!GotDfmt && !GotNfmt) 3563 return MatchOperand_NoMatch; 3564 auto Format = Dfmt | Nfmt << 4; 3565 Operands.push_back( 3566 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3567 return MatchOperand_Success; 3568 } 3569 3570 //===----------------------------------------------------------------------===// 3571 // ds 3572 //===----------------------------------------------------------------------===// 3573 3574 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3575 const OperandVector &Operands) { 3576 OptionalImmIndexMap OptionalIdx; 3577 3578 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3579 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3580 3581 // Add the register arguments 3582 if (Op.isReg()) { 3583 Op.addRegOperands(Inst, 1); 3584 continue; 3585 } 3586 3587 // Handle optional arguments 3588 OptionalIdx[Op.getImmTy()] = i; 3589 } 3590 3591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3594 3595 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3596 } 3597 3598 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3599 bool IsGdsHardcoded) { 3600 OptionalImmIndexMap OptionalIdx; 3601 3602 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3603 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3604 3605 // Add the register arguments 3606 if (Op.isReg()) { 3607 Op.addRegOperands(Inst, 1); 3608 continue; 3609 } 3610 3611 if (Op.isToken() && Op.getToken() == "gds") { 3612 IsGdsHardcoded = true; 3613 continue; 3614 } 3615 3616 // Handle optional arguments 3617 OptionalIdx[Op.getImmTy()] = i; 3618 } 3619 3620 AMDGPUOperand::ImmTy OffsetType = 3621 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3622 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3623 AMDGPUOperand::ImmTyOffset; 3624 3625 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3626 3627 if (!IsGdsHardcoded) { 3628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3629 } 3630 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3631 } 3632 3633 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3634 OptionalImmIndexMap OptionalIdx; 3635 3636 unsigned OperandIdx[4]; 3637 unsigned EnMask = 0; 3638 int SrcIdx = 0; 3639 3640 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3641 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3642 3643 // Add the register arguments 3644 if (Op.isReg()) { 3645 assert(SrcIdx < 4); 3646 OperandIdx[SrcIdx] = Inst.size(); 3647 Op.addRegOperands(Inst, 1); 3648 ++SrcIdx; 3649 continue; 3650 } 3651 3652 if (Op.isOff()) { 3653 assert(SrcIdx < 4); 3654 OperandIdx[SrcIdx] = Inst.size(); 3655 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3656 ++SrcIdx; 3657 continue; 3658 } 3659 3660 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3661 Op.addImmOperands(Inst, 1); 3662 continue; 3663 } 3664 3665 if (Op.isToken() && Op.getToken() == "done") 3666 continue; 3667 3668 // Handle optional arguments 3669 OptionalIdx[Op.getImmTy()] = i; 3670 } 3671 3672 assert(SrcIdx == 4); 3673 3674 bool Compr = false; 3675 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3676 Compr = true; 3677 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3678 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3679 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3680 } 3681 3682 for (auto i = 0; i < SrcIdx; ++i) { 3683 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3684 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3685 } 3686 } 3687 3688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3690 3691 Inst.addOperand(MCOperand::createImm(EnMask)); 3692 } 3693 3694 //===----------------------------------------------------------------------===// 3695 // s_waitcnt 3696 //===----------------------------------------------------------------------===// 3697 3698 static bool 3699 encodeCnt( 3700 const AMDGPU::IsaInfo::IsaVersion ISA, 3701 int64_t &IntVal, 3702 int64_t CntVal, 3703 bool Saturate, 3704 unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), 3705 unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) 3706 { 3707 bool Failed = false; 3708 3709 IntVal = encode(ISA, IntVal, CntVal); 3710 if (CntVal != decode(ISA, IntVal)) { 3711 if (Saturate) { 3712 IntVal = encode(ISA, IntVal, -1); 3713 } else { 3714 Failed = true; 3715 } 3716 } 3717 return Failed; 3718 } 3719 3720 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3721 StringRef CntName = Parser.getTok().getString(); 3722 int64_t CntVal; 3723 3724 Parser.Lex(); 3725 if (getLexer().isNot(AsmToken::LParen)) 3726 return true; 3727 3728 Parser.Lex(); 3729 if (getLexer().isNot(AsmToken::Integer)) 3730 return true; 3731 3732 SMLoc ValLoc = Parser.getTok().getLoc(); 3733 if (getParser().parseAbsoluteExpression(CntVal)) 3734 return true; 3735 3736 AMDGPU::IsaInfo::IsaVersion ISA = 3737 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 3738 3739 bool Failed = true; 3740 bool Sat = CntName.endswith("_sat"); 3741 3742 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3743 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3744 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3745 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3746 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3747 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3748 } 3749 3750 if (Failed) { 3751 Error(ValLoc, "too large value for " + CntName); 3752 return true; 3753 } 3754 3755 if (getLexer().isNot(AsmToken::RParen)) { 3756 return true; 3757 } 3758 3759 Parser.Lex(); 3760 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3761 const AsmToken NextToken = getLexer().peekTok(); 3762 if (NextToken.is(AsmToken::Identifier)) { 3763 Parser.Lex(); 3764 } 3765 } 3766 3767 return false; 3768 } 3769 3770 OperandMatchResultTy 3771 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3772 AMDGPU::IsaInfo::IsaVersion ISA = 3773 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 3774 int64_t Waitcnt = getWaitcntBitMask(ISA); 3775 SMLoc S = Parser.getTok().getLoc(); 3776 3777 switch(getLexer().getKind()) { 3778 default: return MatchOperand_ParseFail; 3779 case AsmToken::Integer: 3780 // The operand can be an integer value. 3781 if (getParser().parseAbsoluteExpression(Waitcnt)) 3782 return MatchOperand_ParseFail; 3783 break; 3784 3785 case AsmToken::Identifier: 3786 do { 3787 if (parseCnt(Waitcnt)) 3788 return MatchOperand_ParseFail; 3789 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3790 break; 3791 } 3792 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3793 return MatchOperand_Success; 3794 } 3795 3796 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3797 int64_t &Width) { 3798 using namespace llvm::AMDGPU::Hwreg; 3799 3800 if (Parser.getTok().getString() != "hwreg") 3801 return true; 3802 Parser.Lex(); 3803 3804 if (getLexer().isNot(AsmToken::LParen)) 3805 return true; 3806 Parser.Lex(); 3807 3808 if (getLexer().is(AsmToken::Identifier)) { 3809 HwReg.IsSymbolic = true; 3810 HwReg.Id = ID_UNKNOWN_; 3811 const StringRef tok = Parser.getTok().getString(); 3812 int Last = ID_SYMBOLIC_LAST_; 3813 if (isSI() || isCI() || isVI()) 3814 Last = ID_SYMBOLIC_FIRST_GFX9_; 3815 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3816 if (tok == IdSymbolic[i]) { 3817 HwReg.Id = i; 3818 break; 3819 } 3820 } 3821 Parser.Lex(); 3822 } else { 3823 HwReg.IsSymbolic = false; 3824 if (getLexer().isNot(AsmToken::Integer)) 3825 return true; 3826 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3827 return true; 3828 } 3829 3830 if (getLexer().is(AsmToken::RParen)) { 3831 Parser.Lex(); 3832 return false; 3833 } 3834 3835 // optional params 3836 if (getLexer().isNot(AsmToken::Comma)) 3837 return true; 3838 Parser.Lex(); 3839 3840 if (getLexer().isNot(AsmToken::Integer)) 3841 return true; 3842 if (getParser().parseAbsoluteExpression(Offset)) 3843 return true; 3844 3845 if (getLexer().isNot(AsmToken::Comma)) 3846 return true; 3847 Parser.Lex(); 3848 3849 if (getLexer().isNot(AsmToken::Integer)) 3850 return true; 3851 if (getParser().parseAbsoluteExpression(Width)) 3852 return true; 3853 3854 if (getLexer().isNot(AsmToken::RParen)) 3855 return true; 3856 Parser.Lex(); 3857 3858 return false; 3859 } 3860 3861 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 3862 using namespace llvm::AMDGPU::Hwreg; 3863 3864 int64_t Imm16Val = 0; 3865 SMLoc S = Parser.getTok().getLoc(); 3866 3867 switch(getLexer().getKind()) { 3868 default: return MatchOperand_NoMatch; 3869 case AsmToken::Integer: 3870 // The operand can be an integer value. 3871 if (getParser().parseAbsoluteExpression(Imm16Val)) 3872 return MatchOperand_NoMatch; 3873 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 3874 Error(S, "invalid immediate: only 16-bit values are legal"); 3875 // Do not return error code, but create an imm operand anyway and proceed 3876 // to the next operand, if any. That avoids unneccessary error messages. 3877 } 3878 break; 3879 3880 case AsmToken::Identifier: { 3881 OperandInfoTy HwReg(ID_UNKNOWN_); 3882 int64_t Offset = OFFSET_DEFAULT_; 3883 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 3884 if (parseHwregConstruct(HwReg, Offset, Width)) 3885 return MatchOperand_ParseFail; 3886 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 3887 if (HwReg.IsSymbolic) 3888 Error(S, "invalid symbolic name of hardware register"); 3889 else 3890 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 3891 } 3892 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 3893 Error(S, "invalid bit offset: only 5-bit values are legal"); 3894 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 3895 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 3896 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 3897 } 3898 break; 3899 } 3900 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 3901 return MatchOperand_Success; 3902 } 3903 3904 bool AMDGPUOperand::isSWaitCnt() const { 3905 return isImm(); 3906 } 3907 3908 bool AMDGPUOperand::isHwreg() const { 3909 return isImmTy(ImmTyHwreg); 3910 } 3911 3912 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 3913 using namespace llvm::AMDGPU::SendMsg; 3914 3915 if (Parser.getTok().getString() != "sendmsg") 3916 return true; 3917 Parser.Lex(); 3918 3919 if (getLexer().isNot(AsmToken::LParen)) 3920 return true; 3921 Parser.Lex(); 3922 3923 if (getLexer().is(AsmToken::Identifier)) { 3924 Msg.IsSymbolic = true; 3925 Msg.Id = ID_UNKNOWN_; 3926 const std::string tok = Parser.getTok().getString(); 3927 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 3928 switch(i) { 3929 default: continue; // Omit gaps. 3930 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 3931 } 3932 if (tok == IdSymbolic[i]) { 3933 Msg.Id = i; 3934 break; 3935 } 3936 } 3937 Parser.Lex(); 3938 } else { 3939 Msg.IsSymbolic = false; 3940 if (getLexer().isNot(AsmToken::Integer)) 3941 return true; 3942 if (getParser().parseAbsoluteExpression(Msg.Id)) 3943 return true; 3944 if (getLexer().is(AsmToken::Integer)) 3945 if (getParser().parseAbsoluteExpression(Msg.Id)) 3946 Msg.Id = ID_UNKNOWN_; 3947 } 3948 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 3949 return false; 3950 3951 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 3952 if (getLexer().isNot(AsmToken::RParen)) 3953 return true; 3954 Parser.Lex(); 3955 return false; 3956 } 3957 3958 if (getLexer().isNot(AsmToken::Comma)) 3959 return true; 3960 Parser.Lex(); 3961 3962 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 3963 Operation.Id = ID_UNKNOWN_; 3964 if (getLexer().is(AsmToken::Identifier)) { 3965 Operation.IsSymbolic = true; 3966 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 3967 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 3968 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 3969 const StringRef Tok = Parser.getTok().getString(); 3970 for (int i = F; i < L; ++i) { 3971 if (Tok == S[i]) { 3972 Operation.Id = i; 3973 break; 3974 } 3975 } 3976 Parser.Lex(); 3977 } else { 3978 Operation.IsSymbolic = false; 3979 if (getLexer().isNot(AsmToken::Integer)) 3980 return true; 3981 if (getParser().parseAbsoluteExpression(Operation.Id)) 3982 return true; 3983 } 3984 3985 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 3986 // Stream id is optional. 3987 if (getLexer().is(AsmToken::RParen)) { 3988 Parser.Lex(); 3989 return false; 3990 } 3991 3992 if (getLexer().isNot(AsmToken::Comma)) 3993 return true; 3994 Parser.Lex(); 3995 3996 if (getLexer().isNot(AsmToken::Integer)) 3997 return true; 3998 if (getParser().parseAbsoluteExpression(StreamId)) 3999 return true; 4000 } 4001 4002 if (getLexer().isNot(AsmToken::RParen)) 4003 return true; 4004 Parser.Lex(); 4005 return false; 4006 } 4007 4008 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4009 if (getLexer().getKind() != AsmToken::Identifier) 4010 return MatchOperand_NoMatch; 4011 4012 StringRef Str = Parser.getTok().getString(); 4013 int Slot = StringSwitch<int>(Str) 4014 .Case("p10", 0) 4015 .Case("p20", 1) 4016 .Case("p0", 2) 4017 .Default(-1); 4018 4019 SMLoc S = Parser.getTok().getLoc(); 4020 if (Slot == -1) 4021 return MatchOperand_ParseFail; 4022 4023 Parser.Lex(); 4024 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4025 AMDGPUOperand::ImmTyInterpSlot)); 4026 return MatchOperand_Success; 4027 } 4028 4029 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4030 if (getLexer().getKind() != AsmToken::Identifier) 4031 return MatchOperand_NoMatch; 4032 4033 StringRef Str = Parser.getTok().getString(); 4034 if (!Str.startswith("attr")) 4035 return MatchOperand_NoMatch; 4036 4037 StringRef Chan = Str.take_back(2); 4038 int AttrChan = StringSwitch<int>(Chan) 4039 .Case(".x", 0) 4040 .Case(".y", 1) 4041 .Case(".z", 2) 4042 .Case(".w", 3) 4043 .Default(-1); 4044 if (AttrChan == -1) 4045 return MatchOperand_ParseFail; 4046 4047 Str = Str.drop_back(2).drop_front(4); 4048 4049 uint8_t Attr; 4050 if (Str.getAsInteger(10, Attr)) 4051 return MatchOperand_ParseFail; 4052 4053 SMLoc S = Parser.getTok().getLoc(); 4054 Parser.Lex(); 4055 if (Attr > 63) { 4056 Error(S, "out of bounds attr"); 4057 return MatchOperand_Success; 4058 } 4059 4060 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4061 4062 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4063 AMDGPUOperand::ImmTyInterpAttr)); 4064 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4065 AMDGPUOperand::ImmTyAttrChan)); 4066 return MatchOperand_Success; 4067 } 4068 4069 void AMDGPUAsmParser::errorExpTgt() { 4070 Error(Parser.getTok().getLoc(), "invalid exp target"); 4071 } 4072 4073 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4074 uint8_t &Val) { 4075 if (Str == "null") { 4076 Val = 9; 4077 return MatchOperand_Success; 4078 } 4079 4080 if (Str.startswith("mrt")) { 4081 Str = Str.drop_front(3); 4082 if (Str == "z") { // == mrtz 4083 Val = 8; 4084 return MatchOperand_Success; 4085 } 4086 4087 if (Str.getAsInteger(10, Val)) 4088 return MatchOperand_ParseFail; 4089 4090 if (Val > 7) 4091 errorExpTgt(); 4092 4093 return MatchOperand_Success; 4094 } 4095 4096 if (Str.startswith("pos")) { 4097 Str = Str.drop_front(3); 4098 if (Str.getAsInteger(10, Val)) 4099 return MatchOperand_ParseFail; 4100 4101 if (Val > 3) 4102 errorExpTgt(); 4103 4104 Val += 12; 4105 return MatchOperand_Success; 4106 } 4107 4108 if (Str.startswith("param")) { 4109 Str = Str.drop_front(5); 4110 if (Str.getAsInteger(10, Val)) 4111 return MatchOperand_ParseFail; 4112 4113 if (Val >= 32) 4114 errorExpTgt(); 4115 4116 Val += 32; 4117 return MatchOperand_Success; 4118 } 4119 4120 if (Str.startswith("invalid_target_")) { 4121 Str = Str.drop_front(15); 4122 if (Str.getAsInteger(10, Val)) 4123 return MatchOperand_ParseFail; 4124 4125 errorExpTgt(); 4126 return MatchOperand_Success; 4127 } 4128 4129 return MatchOperand_NoMatch; 4130 } 4131 4132 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4133 uint8_t Val; 4134 StringRef Str = Parser.getTok().getString(); 4135 4136 auto Res = parseExpTgtImpl(Str, Val); 4137 if (Res != MatchOperand_Success) 4138 return Res; 4139 4140 SMLoc S = Parser.getTok().getLoc(); 4141 Parser.Lex(); 4142 4143 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4144 AMDGPUOperand::ImmTyExpTgt)); 4145 return MatchOperand_Success; 4146 } 4147 4148 OperandMatchResultTy 4149 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4150 using namespace llvm::AMDGPU::SendMsg; 4151 4152 int64_t Imm16Val = 0; 4153 SMLoc S = Parser.getTok().getLoc(); 4154 4155 switch(getLexer().getKind()) { 4156 default: 4157 return MatchOperand_NoMatch; 4158 case AsmToken::Integer: 4159 // The operand can be an integer value. 4160 if (getParser().parseAbsoluteExpression(Imm16Val)) 4161 return MatchOperand_NoMatch; 4162 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4163 Error(S, "invalid immediate: only 16-bit values are legal"); 4164 // Do not return error code, but create an imm operand anyway and proceed 4165 // to the next operand, if any. That avoids unneccessary error messages. 4166 } 4167 break; 4168 case AsmToken::Identifier: { 4169 OperandInfoTy Msg(ID_UNKNOWN_); 4170 OperandInfoTy Operation(OP_UNKNOWN_); 4171 int64_t StreamId = STREAM_ID_DEFAULT_; 4172 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4173 return MatchOperand_ParseFail; 4174 do { 4175 // Validate and encode message ID. 4176 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4177 || Msg.Id == ID_SYSMSG)) { 4178 if (Msg.IsSymbolic) 4179 Error(S, "invalid/unsupported symbolic name of message"); 4180 else 4181 Error(S, "invalid/unsupported code of message"); 4182 break; 4183 } 4184 Imm16Val = (Msg.Id << ID_SHIFT_); 4185 // Validate and encode operation ID. 4186 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4187 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4188 if (Operation.IsSymbolic) 4189 Error(S, "invalid symbolic name of GS_OP"); 4190 else 4191 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4192 break; 4193 } 4194 if (Operation.Id == OP_GS_NOP 4195 && Msg.Id != ID_GS_DONE) { 4196 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4197 break; 4198 } 4199 Imm16Val |= (Operation.Id << OP_SHIFT_); 4200 } 4201 if (Msg.Id == ID_SYSMSG) { 4202 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4203 if (Operation.IsSymbolic) 4204 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4205 else 4206 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4207 break; 4208 } 4209 Imm16Val |= (Operation.Id << OP_SHIFT_); 4210 } 4211 // Validate and encode stream ID. 4212 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4213 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4214 Error(S, "invalid stream id: only 2-bit values are legal"); 4215 break; 4216 } 4217 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4218 } 4219 } while (false); 4220 } 4221 break; 4222 } 4223 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4224 return MatchOperand_Success; 4225 } 4226 4227 bool AMDGPUOperand::isSendMsg() const { 4228 return isImmTy(ImmTySendMsg); 4229 } 4230 4231 //===----------------------------------------------------------------------===// 4232 // parser helpers 4233 //===----------------------------------------------------------------------===// 4234 4235 bool 4236 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4237 if (getLexer().getKind() == AsmToken::Identifier && 4238 Parser.getTok().getString() == Id) { 4239 Parser.Lex(); 4240 return true; 4241 } 4242 return false; 4243 } 4244 4245 bool 4246 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4247 if (getLexer().getKind() == Kind) { 4248 Parser.Lex(); 4249 return true; 4250 } 4251 return false; 4252 } 4253 4254 bool 4255 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4256 const StringRef ErrMsg) { 4257 if (!trySkipToken(Kind)) { 4258 Error(Parser.getTok().getLoc(), ErrMsg); 4259 return false; 4260 } 4261 return true; 4262 } 4263 4264 bool 4265 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4266 return !getParser().parseAbsoluteExpression(Imm); 4267 } 4268 4269 bool 4270 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4271 SMLoc S = Parser.getTok().getLoc(); 4272 if (getLexer().getKind() == AsmToken::String) { 4273 Val = Parser.getTok().getStringContents(); 4274 Parser.Lex(); 4275 return true; 4276 } else { 4277 Error(S, ErrMsg); 4278 return false; 4279 } 4280 } 4281 4282 //===----------------------------------------------------------------------===// 4283 // swizzle 4284 //===----------------------------------------------------------------------===// 4285 4286 LLVM_READNONE 4287 static unsigned 4288 encodeBitmaskPerm(const unsigned AndMask, 4289 const unsigned OrMask, 4290 const unsigned XorMask) { 4291 using namespace llvm::AMDGPU::Swizzle; 4292 4293 return BITMASK_PERM_ENC | 4294 (AndMask << BITMASK_AND_SHIFT) | 4295 (OrMask << BITMASK_OR_SHIFT) | 4296 (XorMask << BITMASK_XOR_SHIFT); 4297 } 4298 4299 bool 4300 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4301 const unsigned MinVal, 4302 const unsigned MaxVal, 4303 const StringRef ErrMsg) { 4304 for (unsigned i = 0; i < OpNum; ++i) { 4305 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4306 return false; 4307 } 4308 SMLoc ExprLoc = Parser.getTok().getLoc(); 4309 if (!parseExpr(Op[i])) { 4310 return false; 4311 } 4312 if (Op[i] < MinVal || Op[i] > MaxVal) { 4313 Error(ExprLoc, ErrMsg); 4314 return false; 4315 } 4316 } 4317 4318 return true; 4319 } 4320 4321 bool 4322 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4323 using namespace llvm::AMDGPU::Swizzle; 4324 4325 int64_t Lane[LANE_NUM]; 4326 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4327 "expected a 2-bit lane id")) { 4328 Imm = QUAD_PERM_ENC; 4329 for (auto i = 0; i < LANE_NUM; ++i) { 4330 Imm |= Lane[i] << (LANE_SHIFT * i); 4331 } 4332 return true; 4333 } 4334 return false; 4335 } 4336 4337 bool 4338 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4339 using namespace llvm::AMDGPU::Swizzle; 4340 4341 SMLoc S = Parser.getTok().getLoc(); 4342 int64_t GroupSize; 4343 int64_t LaneIdx; 4344 4345 if (!parseSwizzleOperands(1, &GroupSize, 4346 2, 32, 4347 "group size must be in the interval [2,32]")) { 4348 return false; 4349 } 4350 if (!isPowerOf2_64(GroupSize)) { 4351 Error(S, "group size must be a power of two"); 4352 return false; 4353 } 4354 if (parseSwizzleOperands(1, &LaneIdx, 4355 0, GroupSize - 1, 4356 "lane id must be in the interval [0,group size - 1]")) { 4357 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4358 return true; 4359 } 4360 return false; 4361 } 4362 4363 bool 4364 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4365 using namespace llvm::AMDGPU::Swizzle; 4366 4367 SMLoc S = Parser.getTok().getLoc(); 4368 int64_t GroupSize; 4369 4370 if (!parseSwizzleOperands(1, &GroupSize, 4371 2, 32, "group size must be in the interval [2,32]")) { 4372 return false; 4373 } 4374 if (!isPowerOf2_64(GroupSize)) { 4375 Error(S, "group size must be a power of two"); 4376 return false; 4377 } 4378 4379 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4380 return true; 4381 } 4382 4383 bool 4384 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4385 using namespace llvm::AMDGPU::Swizzle; 4386 4387 SMLoc S = Parser.getTok().getLoc(); 4388 int64_t GroupSize; 4389 4390 if (!parseSwizzleOperands(1, &GroupSize, 4391 1, 16, "group size must be in the interval [1,16]")) { 4392 return false; 4393 } 4394 if (!isPowerOf2_64(GroupSize)) { 4395 Error(S, "group size must be a power of two"); 4396 return false; 4397 } 4398 4399 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4400 return true; 4401 } 4402 4403 bool 4404 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4405 using namespace llvm::AMDGPU::Swizzle; 4406 4407 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4408 return false; 4409 } 4410 4411 StringRef Ctl; 4412 SMLoc StrLoc = Parser.getTok().getLoc(); 4413 if (!parseString(Ctl)) { 4414 return false; 4415 } 4416 if (Ctl.size() != BITMASK_WIDTH) { 4417 Error(StrLoc, "expected a 5-character mask"); 4418 return false; 4419 } 4420 4421 unsigned AndMask = 0; 4422 unsigned OrMask = 0; 4423 unsigned XorMask = 0; 4424 4425 for (size_t i = 0; i < Ctl.size(); ++i) { 4426 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4427 switch(Ctl[i]) { 4428 default: 4429 Error(StrLoc, "invalid mask"); 4430 return false; 4431 case '0': 4432 break; 4433 case '1': 4434 OrMask |= Mask; 4435 break; 4436 case 'p': 4437 AndMask |= Mask; 4438 break; 4439 case 'i': 4440 AndMask |= Mask; 4441 XorMask |= Mask; 4442 break; 4443 } 4444 } 4445 4446 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4447 return true; 4448 } 4449 4450 bool 4451 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4452 4453 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4454 4455 if (!parseExpr(Imm)) { 4456 return false; 4457 } 4458 if (!isUInt<16>(Imm)) { 4459 Error(OffsetLoc, "expected a 16-bit offset"); 4460 return false; 4461 } 4462 return true; 4463 } 4464 4465 bool 4466 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4467 using namespace llvm::AMDGPU::Swizzle; 4468 4469 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4470 4471 SMLoc ModeLoc = Parser.getTok().getLoc(); 4472 bool Ok = false; 4473 4474 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4475 Ok = parseSwizzleQuadPerm(Imm); 4476 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4477 Ok = parseSwizzleBitmaskPerm(Imm); 4478 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4479 Ok = parseSwizzleBroadcast(Imm); 4480 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4481 Ok = parseSwizzleSwap(Imm); 4482 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4483 Ok = parseSwizzleReverse(Imm); 4484 } else { 4485 Error(ModeLoc, "expected a swizzle mode"); 4486 } 4487 4488 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4489 } 4490 4491 return false; 4492 } 4493 4494 OperandMatchResultTy 4495 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4496 SMLoc S = Parser.getTok().getLoc(); 4497 int64_t Imm = 0; 4498 4499 if (trySkipId("offset")) { 4500 4501 bool Ok = false; 4502 if (skipToken(AsmToken::Colon, "expected a colon")) { 4503 if (trySkipId("swizzle")) { 4504 Ok = parseSwizzleMacro(Imm); 4505 } else { 4506 Ok = parseSwizzleOffset(Imm); 4507 } 4508 } 4509 4510 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4511 4512 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4513 } else { 4514 // Swizzle "offset" operand is optional. 4515 // If it is omitted, try parsing other optional operands. 4516 return parseOptionalOpr(Operands); 4517 } 4518 } 4519 4520 bool 4521 AMDGPUOperand::isSwizzle() const { 4522 return isImmTy(ImmTySwizzle); 4523 } 4524 4525 //===----------------------------------------------------------------------===// 4526 // sopp branch targets 4527 //===----------------------------------------------------------------------===// 4528 4529 OperandMatchResultTy 4530 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4531 SMLoc S = Parser.getTok().getLoc(); 4532 4533 switch (getLexer().getKind()) { 4534 default: return MatchOperand_ParseFail; 4535 case AsmToken::Integer: { 4536 int64_t Imm; 4537 if (getParser().parseAbsoluteExpression(Imm)) 4538 return MatchOperand_ParseFail; 4539 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4540 return MatchOperand_Success; 4541 } 4542 4543 case AsmToken::Identifier: 4544 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4545 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4546 Parser.getTok().getString()), getContext()), S)); 4547 Parser.Lex(); 4548 return MatchOperand_Success; 4549 } 4550 } 4551 4552 //===----------------------------------------------------------------------===// 4553 // mubuf 4554 //===----------------------------------------------------------------------===// 4555 4556 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4557 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4558 } 4559 4560 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4561 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4562 } 4563 4564 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4565 const OperandVector &Operands, 4566 bool IsAtomic, 4567 bool IsAtomicReturn, 4568 bool IsLds) { 4569 bool IsLdsOpcode = IsLds; 4570 bool HasLdsModifier = false; 4571 OptionalImmIndexMap OptionalIdx; 4572 assert(IsAtomicReturn ? IsAtomic : true); 4573 4574 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4575 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4576 4577 // Add the register arguments 4578 if (Op.isReg()) { 4579 Op.addRegOperands(Inst, 1); 4580 continue; 4581 } 4582 4583 // Handle the case where soffset is an immediate 4584 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4585 Op.addImmOperands(Inst, 1); 4586 continue; 4587 } 4588 4589 HasLdsModifier = Op.isLDS(); 4590 4591 // Handle tokens like 'offen' which are sometimes hard-coded into the 4592 // asm string. There are no MCInst operands for these. 4593 if (Op.isToken()) { 4594 continue; 4595 } 4596 assert(Op.isImm()); 4597 4598 // Handle optional arguments 4599 OptionalIdx[Op.getImmTy()] = i; 4600 } 4601 4602 // This is a workaround for an llvm quirk which may result in an 4603 // incorrect instruction selection. Lds and non-lds versions of 4604 // MUBUF instructions are identical except that lds versions 4605 // have mandatory 'lds' modifier. However this modifier follows 4606 // optional modifiers and llvm asm matcher regards this 'lds' 4607 // modifier as an optional one. As a result, an lds version 4608 // of opcode may be selected even if it has no 'lds' modifier. 4609 if (IsLdsOpcode && !HasLdsModifier) { 4610 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4611 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4612 Inst.setOpcode(NoLdsOpcode); 4613 IsLdsOpcode = false; 4614 } 4615 } 4616 4617 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4618 if (IsAtomicReturn) { 4619 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4620 Inst.insert(I, *I); 4621 } 4622 4623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4624 if (!IsAtomic) { // glc is hard-coded. 4625 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4626 } 4627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4628 4629 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4631 } 4632 } 4633 4634 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4635 OptionalImmIndexMap OptionalIdx; 4636 4637 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4638 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4639 4640 // Add the register arguments 4641 if (Op.isReg()) { 4642 Op.addRegOperands(Inst, 1); 4643 continue; 4644 } 4645 4646 // Handle the case where soffset is an immediate 4647 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4648 Op.addImmOperands(Inst, 1); 4649 continue; 4650 } 4651 4652 // Handle tokens like 'offen' which are sometimes hard-coded into the 4653 // asm string. There are no MCInst operands for these. 4654 if (Op.isToken()) { 4655 continue; 4656 } 4657 assert(Op.isImm()); 4658 4659 // Handle optional arguments 4660 OptionalIdx[Op.getImmTy()] = i; 4661 } 4662 4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4664 AMDGPUOperand::ImmTyOffset); 4665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4669 } 4670 4671 //===----------------------------------------------------------------------===// 4672 // mimg 4673 //===----------------------------------------------------------------------===// 4674 4675 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4676 bool IsAtomic) { 4677 unsigned I = 1; 4678 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4679 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4680 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4681 } 4682 4683 if (IsAtomic) { 4684 // Add src, same as dst 4685 assert(Desc.getNumDefs() == 1); 4686 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4687 } 4688 4689 OptionalImmIndexMap OptionalIdx; 4690 4691 for (unsigned E = Operands.size(); I != E; ++I) { 4692 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4693 4694 // Add the register arguments 4695 if (Op.isReg()) { 4696 Op.addRegOperands(Inst, 1); 4697 } else if (Op.isImmModifier()) { 4698 OptionalIdx[Op.getImmTy()] = I; 4699 } else { 4700 llvm_unreachable("unexpected operand type"); 4701 } 4702 } 4703 4704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4705 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); 4709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4710 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4711 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4712 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4713 } 4714 4715 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4716 cvtMIMG(Inst, Operands, true); 4717 } 4718 4719 //===----------------------------------------------------------------------===// 4720 // smrd 4721 //===----------------------------------------------------------------------===// 4722 4723 bool AMDGPUOperand::isSMRDOffset8() const { 4724 return isImm() && isUInt<8>(getImm()); 4725 } 4726 4727 bool AMDGPUOperand::isSMRDOffset20() const { 4728 return isImm() && isUInt<20>(getImm()); 4729 } 4730 4731 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4732 // 32-bit literals are only supported on CI and we only want to use them 4733 // when the offset is > 8-bits. 4734 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4735 } 4736 4737 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4738 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4739 } 4740 4741 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4742 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4743 } 4744 4745 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4746 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4747 } 4748 4749 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4750 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4751 } 4752 4753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 4754 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4755 } 4756 4757 //===----------------------------------------------------------------------===// 4758 // vop3 4759 //===----------------------------------------------------------------------===// 4760 4761 static bool ConvertOmodMul(int64_t &Mul) { 4762 if (Mul != 1 && Mul != 2 && Mul != 4) 4763 return false; 4764 4765 Mul >>= 1; 4766 return true; 4767 } 4768 4769 static bool ConvertOmodDiv(int64_t &Div) { 4770 if (Div == 1) { 4771 Div = 0; 4772 return true; 4773 } 4774 4775 if (Div == 2) { 4776 Div = 3; 4777 return true; 4778 } 4779 4780 return false; 4781 } 4782 4783 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 4784 if (BoundCtrl == 0) { 4785 BoundCtrl = 1; 4786 return true; 4787 } 4788 4789 if (BoundCtrl == -1) { 4790 BoundCtrl = 0; 4791 return true; 4792 } 4793 4794 return false; 4795 } 4796 4797 // Note: the order in this table matches the order of operands in AsmString. 4798 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 4799 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 4800 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 4801 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 4802 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 4803 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 4804 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 4805 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 4806 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 4807 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 4808 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 4809 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 4810 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 4811 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 4812 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4813 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 4814 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 4815 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 4816 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 4817 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 4818 {"r128", AMDGPUOperand::ImmTyR128, true, nullptr}, 4819 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 4820 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4821 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 4822 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 4823 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 4824 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 4825 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 4826 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 4827 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 4828 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 4829 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 4830 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 4831 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 4832 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 4833 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 4834 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 4835 }; 4836 4837 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 4838 unsigned size = Operands.size(); 4839 assert(size > 0); 4840 4841 OperandMatchResultTy res = parseOptionalOpr(Operands); 4842 4843 // This is a hack to enable hardcoded mandatory operands which follow 4844 // optional operands. 4845 // 4846 // Current design assumes that all operands after the first optional operand 4847 // are also optional. However implementation of some instructions violates 4848 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 4849 // 4850 // To alleviate this problem, we have to (implicitly) parse extra operands 4851 // to make sure autogenerated parser of custom operands never hit hardcoded 4852 // mandatory operands. 4853 4854 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 4855 4856 // We have parsed the first optional operand. 4857 // Parse as many operands as necessary to skip all mandatory operands. 4858 4859 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 4860 if (res != MatchOperand_Success || 4861 getLexer().is(AsmToken::EndOfStatement)) break; 4862 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 4863 res = parseOptionalOpr(Operands); 4864 } 4865 } 4866 4867 return res; 4868 } 4869 4870 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 4871 OperandMatchResultTy res; 4872 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 4873 // try to parse any optional operand here 4874 if (Op.IsBit) { 4875 res = parseNamedBit(Op.Name, Operands, Op.Type); 4876 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 4877 res = parseOModOperand(Operands); 4878 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 4879 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 4880 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 4881 res = parseSDWASel(Operands, Op.Name, Op.Type); 4882 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 4883 res = parseSDWADstUnused(Operands); 4884 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 4885 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 4886 Op.Type == AMDGPUOperand::ImmTyNegLo || 4887 Op.Type == AMDGPUOperand::ImmTyNegHi) { 4888 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 4889 Op.ConvertResult); 4890 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 4891 res = parseDfmtNfmt(Operands); 4892 } else { 4893 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 4894 } 4895 if (res != MatchOperand_NoMatch) { 4896 return res; 4897 } 4898 } 4899 return MatchOperand_NoMatch; 4900 } 4901 4902 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 4903 StringRef Name = Parser.getTok().getString(); 4904 if (Name == "mul") { 4905 return parseIntWithPrefix("mul", Operands, 4906 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 4907 } 4908 4909 if (Name == "div") { 4910 return parseIntWithPrefix("div", Operands, 4911 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 4912 } 4913 4914 return MatchOperand_NoMatch; 4915 } 4916 4917 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 4918 cvtVOP3P(Inst, Operands); 4919 4920 int Opc = Inst.getOpcode(); 4921 4922 int SrcNum; 4923 const int Ops[] = { AMDGPU::OpName::src0, 4924 AMDGPU::OpName::src1, 4925 AMDGPU::OpName::src2 }; 4926 for (SrcNum = 0; 4927 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 4928 ++SrcNum); 4929 assert(SrcNum > 0); 4930 4931 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4932 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4933 4934 if ((OpSel & (1 << SrcNum)) != 0) { 4935 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 4936 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 4937 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 4938 } 4939 } 4940 4941 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 4942 // 1. This operand is input modifiers 4943 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 4944 // 2. This is not last operand 4945 && Desc.NumOperands > (OpNum + 1) 4946 // 3. Next operand is register class 4947 && Desc.OpInfo[OpNum + 1].RegClass != -1 4948 // 4. Next register is not tied to any other operand 4949 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 4950 } 4951 4952 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 4953 { 4954 OptionalImmIndexMap OptionalIdx; 4955 unsigned Opc = Inst.getOpcode(); 4956 4957 unsigned I = 1; 4958 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4959 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4960 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4961 } 4962 4963 for (unsigned E = Operands.size(); I != E; ++I) { 4964 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4965 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4966 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4967 } else if (Op.isInterpSlot() || 4968 Op.isInterpAttr() || 4969 Op.isAttrChan()) { 4970 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 4971 } else if (Op.isImmModifier()) { 4972 OptionalIdx[Op.getImmTy()] = I; 4973 } else { 4974 llvm_unreachable("unhandled operand type"); 4975 } 4976 } 4977 4978 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 4979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 4980 } 4981 4982 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 4983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 4984 } 4985 4986 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 4987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 4988 } 4989 } 4990 4991 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 4992 OptionalImmIndexMap &OptionalIdx) { 4993 unsigned Opc = Inst.getOpcode(); 4994 4995 unsigned I = 1; 4996 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4997 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4998 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4999 } 5000 5001 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5002 // This instruction has src modifiers 5003 for (unsigned E = Operands.size(); I != E; ++I) { 5004 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5005 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5006 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5007 } else if (Op.isImmModifier()) { 5008 OptionalIdx[Op.getImmTy()] = I; 5009 } else if (Op.isRegOrImm()) { 5010 Op.addRegOrImmOperands(Inst, 1); 5011 } else { 5012 llvm_unreachable("unhandled operand type"); 5013 } 5014 } 5015 } else { 5016 // No src modifiers 5017 for (unsigned E = Operands.size(); I != E; ++I) { 5018 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5019 if (Op.isMod()) { 5020 OptionalIdx[Op.getImmTy()] = I; 5021 } else { 5022 Op.addRegOrImmOperands(Inst, 1); 5023 } 5024 } 5025 } 5026 5027 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5028 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5029 } 5030 5031 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5032 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5033 } 5034 5035 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5036 // it has src2 register operand that is tied to dst operand 5037 // we don't allow modifiers for this operand in assembler so src2_modifiers 5038 // should be 0. 5039 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5040 Opc == AMDGPU::V_MAC_F32_e64_vi || 5041 Opc == AMDGPU::V_MAC_F16_e64_vi || 5042 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5043 auto it = Inst.begin(); 5044 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5045 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5046 ++it; 5047 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5048 } 5049 } 5050 5051 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5052 OptionalImmIndexMap OptionalIdx; 5053 cvtVOP3(Inst, Operands, OptionalIdx); 5054 } 5055 5056 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5057 const OperandVector &Operands) { 5058 OptionalImmIndexMap OptIdx; 5059 const int Opc = Inst.getOpcode(); 5060 const MCInstrDesc &Desc = MII.get(Opc); 5061 5062 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5063 5064 cvtVOP3(Inst, Operands, OptIdx); 5065 5066 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5067 assert(!IsPacked); 5068 Inst.addOperand(Inst.getOperand(0)); 5069 } 5070 5071 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5072 // instruction, and then figure out where to actually put the modifiers 5073 5074 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5075 5076 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5077 if (OpSelHiIdx != -1) { 5078 int DefaultVal = IsPacked ? -1 : 0; 5079 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5080 DefaultVal); 5081 } 5082 5083 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5084 if (NegLoIdx != -1) { 5085 assert(IsPacked); 5086 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5087 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5088 } 5089 5090 const int Ops[] = { AMDGPU::OpName::src0, 5091 AMDGPU::OpName::src1, 5092 AMDGPU::OpName::src2 }; 5093 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5094 AMDGPU::OpName::src1_modifiers, 5095 AMDGPU::OpName::src2_modifiers }; 5096 5097 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5098 5099 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5100 unsigned OpSelHi = 0; 5101 unsigned NegLo = 0; 5102 unsigned NegHi = 0; 5103 5104 if (OpSelHiIdx != -1) { 5105 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5106 } 5107 5108 if (NegLoIdx != -1) { 5109 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5110 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5111 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5112 } 5113 5114 for (int J = 0; J < 3; ++J) { 5115 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5116 if (OpIdx == -1) 5117 break; 5118 5119 uint32_t ModVal = 0; 5120 5121 if ((OpSel & (1 << J)) != 0) 5122 ModVal |= SISrcMods::OP_SEL_0; 5123 5124 if ((OpSelHi & (1 << J)) != 0) 5125 ModVal |= SISrcMods::OP_SEL_1; 5126 5127 if ((NegLo & (1 << J)) != 0) 5128 ModVal |= SISrcMods::NEG; 5129 5130 if ((NegHi & (1 << J)) != 0) 5131 ModVal |= SISrcMods::NEG_HI; 5132 5133 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5134 5135 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5136 } 5137 } 5138 5139 //===----------------------------------------------------------------------===// 5140 // dpp 5141 //===----------------------------------------------------------------------===// 5142 5143 bool AMDGPUOperand::isDPPCtrl() const { 5144 using namespace AMDGPU::DPP; 5145 5146 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5147 if (result) { 5148 int64_t Imm = getImm(); 5149 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5150 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5151 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5152 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5153 (Imm == DppCtrl::WAVE_SHL1) || 5154 (Imm == DppCtrl::WAVE_ROL1) || 5155 (Imm == DppCtrl::WAVE_SHR1) || 5156 (Imm == DppCtrl::WAVE_ROR1) || 5157 (Imm == DppCtrl::ROW_MIRROR) || 5158 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5159 (Imm == DppCtrl::BCAST15) || 5160 (Imm == DppCtrl::BCAST31); 5161 } 5162 return false; 5163 } 5164 5165 bool AMDGPUOperand::isGPRIdxMode() const { 5166 return isImm() && isUInt<4>(getImm()); 5167 } 5168 5169 bool AMDGPUOperand::isS16Imm() const { 5170 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5171 } 5172 5173 bool AMDGPUOperand::isU16Imm() const { 5174 return isImm() && isUInt<16>(getImm()); 5175 } 5176 5177 OperandMatchResultTy 5178 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5179 using namespace AMDGPU::DPP; 5180 5181 SMLoc S = Parser.getTok().getLoc(); 5182 StringRef Prefix; 5183 int64_t Int; 5184 5185 if (getLexer().getKind() == AsmToken::Identifier) { 5186 Prefix = Parser.getTok().getString(); 5187 } else { 5188 return MatchOperand_NoMatch; 5189 } 5190 5191 if (Prefix == "row_mirror") { 5192 Int = DppCtrl::ROW_MIRROR; 5193 Parser.Lex(); 5194 } else if (Prefix == "row_half_mirror") { 5195 Int = DppCtrl::ROW_HALF_MIRROR; 5196 Parser.Lex(); 5197 } else { 5198 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5199 if (Prefix != "quad_perm" 5200 && Prefix != "row_shl" 5201 && Prefix != "row_shr" 5202 && Prefix != "row_ror" 5203 && Prefix != "wave_shl" 5204 && Prefix != "wave_rol" 5205 && Prefix != "wave_shr" 5206 && Prefix != "wave_ror" 5207 && Prefix != "row_bcast") { 5208 return MatchOperand_NoMatch; 5209 } 5210 5211 Parser.Lex(); 5212 if (getLexer().isNot(AsmToken::Colon)) 5213 return MatchOperand_ParseFail; 5214 5215 if (Prefix == "quad_perm") { 5216 // quad_perm:[%d,%d,%d,%d] 5217 Parser.Lex(); 5218 if (getLexer().isNot(AsmToken::LBrac)) 5219 return MatchOperand_ParseFail; 5220 Parser.Lex(); 5221 5222 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5223 return MatchOperand_ParseFail; 5224 5225 for (int i = 0; i < 3; ++i) { 5226 if (getLexer().isNot(AsmToken::Comma)) 5227 return MatchOperand_ParseFail; 5228 Parser.Lex(); 5229 5230 int64_t Temp; 5231 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5232 return MatchOperand_ParseFail; 5233 const int shift = i*2 + 2; 5234 Int += (Temp << shift); 5235 } 5236 5237 if (getLexer().isNot(AsmToken::RBrac)) 5238 return MatchOperand_ParseFail; 5239 Parser.Lex(); 5240 } else { 5241 // sel:%d 5242 Parser.Lex(); 5243 if (getParser().parseAbsoluteExpression(Int)) 5244 return MatchOperand_ParseFail; 5245 5246 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5247 Int |= DppCtrl::ROW_SHL0; 5248 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5249 Int |= DppCtrl::ROW_SHR0; 5250 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5251 Int |= DppCtrl::ROW_ROR0; 5252 } else if (Prefix == "wave_shl" && 1 == Int) { 5253 Int = DppCtrl::WAVE_SHL1; 5254 } else if (Prefix == "wave_rol" && 1 == Int) { 5255 Int = DppCtrl::WAVE_ROL1; 5256 } else if (Prefix == "wave_shr" && 1 == Int) { 5257 Int = DppCtrl::WAVE_SHR1; 5258 } else if (Prefix == "wave_ror" && 1 == Int) { 5259 Int = DppCtrl::WAVE_ROR1; 5260 } else if (Prefix == "row_bcast") { 5261 if (Int == 15) { 5262 Int = DppCtrl::BCAST15; 5263 } else if (Int == 31) { 5264 Int = DppCtrl::BCAST31; 5265 } else { 5266 return MatchOperand_ParseFail; 5267 } 5268 } else { 5269 return MatchOperand_ParseFail; 5270 } 5271 } 5272 } 5273 5274 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5275 return MatchOperand_Success; 5276 } 5277 5278 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5279 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5280 } 5281 5282 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5283 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5284 } 5285 5286 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5287 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5288 } 5289 5290 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5291 OptionalImmIndexMap OptionalIdx; 5292 5293 unsigned I = 1; 5294 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5295 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5296 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5297 } 5298 5299 // All DPP instructions with at least one source operand have a fake "old" 5300 // source at the beginning that's tied to the dst operand. Handle it here. 5301 if (Desc.getNumOperands() >= 2) 5302 Inst.addOperand(Inst.getOperand(0)); 5303 5304 for (unsigned E = Operands.size(); I != E; ++I) { 5305 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5306 // Add the register arguments 5307 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5308 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5309 // Skip it. 5310 continue; 5311 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5312 Op.addRegWithFPInputModsOperands(Inst, 2); 5313 } else if (Op.isDPPCtrl()) { 5314 Op.addImmOperands(Inst, 1); 5315 } else if (Op.isImm()) { 5316 // Handle optional arguments 5317 OptionalIdx[Op.getImmTy()] = I; 5318 } else { 5319 llvm_unreachable("Invalid operand type"); 5320 } 5321 } 5322 5323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5326 } 5327 5328 //===----------------------------------------------------------------------===// 5329 // sdwa 5330 //===----------------------------------------------------------------------===// 5331 5332 OperandMatchResultTy 5333 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5334 AMDGPUOperand::ImmTy Type) { 5335 using namespace llvm::AMDGPU::SDWA; 5336 5337 SMLoc S = Parser.getTok().getLoc(); 5338 StringRef Value; 5339 OperandMatchResultTy res; 5340 5341 res = parseStringWithPrefix(Prefix, Value); 5342 if (res != MatchOperand_Success) { 5343 return res; 5344 } 5345 5346 int64_t Int; 5347 Int = StringSwitch<int64_t>(Value) 5348 .Case("BYTE_0", SdwaSel::BYTE_0) 5349 .Case("BYTE_1", SdwaSel::BYTE_1) 5350 .Case("BYTE_2", SdwaSel::BYTE_2) 5351 .Case("BYTE_3", SdwaSel::BYTE_3) 5352 .Case("WORD_0", SdwaSel::WORD_0) 5353 .Case("WORD_1", SdwaSel::WORD_1) 5354 .Case("DWORD", SdwaSel::DWORD) 5355 .Default(0xffffffff); 5356 Parser.Lex(); // eat last token 5357 5358 if (Int == 0xffffffff) { 5359 return MatchOperand_ParseFail; 5360 } 5361 5362 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5363 return MatchOperand_Success; 5364 } 5365 5366 OperandMatchResultTy 5367 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5368 using namespace llvm::AMDGPU::SDWA; 5369 5370 SMLoc S = Parser.getTok().getLoc(); 5371 StringRef Value; 5372 OperandMatchResultTy res; 5373 5374 res = parseStringWithPrefix("dst_unused", Value); 5375 if (res != MatchOperand_Success) { 5376 return res; 5377 } 5378 5379 int64_t Int; 5380 Int = StringSwitch<int64_t>(Value) 5381 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5382 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5383 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5384 .Default(0xffffffff); 5385 Parser.Lex(); // eat last token 5386 5387 if (Int == 0xffffffff) { 5388 return MatchOperand_ParseFail; 5389 } 5390 5391 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5392 return MatchOperand_Success; 5393 } 5394 5395 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5396 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5397 } 5398 5399 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5400 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5401 } 5402 5403 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5404 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5405 } 5406 5407 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5408 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5409 } 5410 5411 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5412 uint64_t BasicInstType, bool skipVcc) { 5413 using namespace llvm::AMDGPU::SDWA; 5414 5415 OptionalImmIndexMap OptionalIdx; 5416 bool skippedVcc = false; 5417 5418 unsigned I = 1; 5419 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5420 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5421 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5422 } 5423 5424 for (unsigned E = Operands.size(); I != E; ++I) { 5425 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5426 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5427 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5428 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5429 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5430 // Skip VCC only if we didn't skip it on previous iteration. 5431 if (BasicInstType == SIInstrFlags::VOP2 && 5432 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5433 skippedVcc = true; 5434 continue; 5435 } else if (BasicInstType == SIInstrFlags::VOPC && 5436 Inst.getNumOperands() == 0) { 5437 skippedVcc = true; 5438 continue; 5439 } 5440 } 5441 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5442 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5443 } else if (Op.isImm()) { 5444 // Handle optional arguments 5445 OptionalIdx[Op.getImmTy()] = I; 5446 } else { 5447 llvm_unreachable("Invalid operand type"); 5448 } 5449 skippedVcc = false; 5450 } 5451 5452 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5453 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5454 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5455 switch (BasicInstType) { 5456 case SIInstrFlags::VOP1: 5457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5458 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5460 } 5461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5463 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5464 break; 5465 5466 case SIInstrFlags::VOP2: 5467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5468 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5470 } 5471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5475 break; 5476 5477 case SIInstrFlags::VOPC: 5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5480 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5481 break; 5482 5483 default: 5484 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5485 } 5486 } 5487 5488 // special case v_mac_{f16, f32}: 5489 // it has src2 register operand that is tied to dst operand 5490 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5491 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5492 auto it = Inst.begin(); 5493 std::advance( 5494 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5495 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5496 } 5497 } 5498 5499 /// Force static initialization. 5500 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5501 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5502 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5503 } 5504 5505 #define GET_REGISTER_MATCHER 5506 #define GET_MATCHER_IMPLEMENTATION 5507 #define GET_MNEMONIC_SPELL_CHECKER 5508 #include "AMDGPUGenAsmMatcher.inc" 5509 5510 // This fuction should be defined after auto-generated include so that we have 5511 // MatchClassKind enum defined 5512 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5513 unsigned Kind) { 5514 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5515 // But MatchInstructionImpl() expects to meet token and fails to validate 5516 // operand. This method checks if we are given immediate operand but expect to 5517 // get corresponding token. 5518 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5519 switch (Kind) { 5520 case MCK_addr64: 5521 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5522 case MCK_gds: 5523 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5524 case MCK_lds: 5525 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5526 case MCK_glc: 5527 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5528 case MCK_idxen: 5529 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5530 case MCK_offen: 5531 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5532 case MCK_SSrcB32: 5533 // When operands have expression values, they will return true for isToken, 5534 // because it is not possible to distinguish between a token and an 5535 // expression at parse time. MatchInstructionImpl() will always try to 5536 // match an operand as a token, when isToken returns true, and when the 5537 // name of the expression is not a valid token, the match will fail, 5538 // so we need to handle it here. 5539 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5540 case MCK_SSrcF32: 5541 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5542 case MCK_SoppBrTarget: 5543 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5544 case MCK_VReg32OrOff: 5545 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5546 case MCK_InterpSlot: 5547 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5548 case MCK_Attr: 5549 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5550 case MCK_AttrChan: 5551 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5552 default: 5553 return Match_InvalidOperand; 5554 } 5555 } 5556