1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1742 OptionalImmIndexMap &OptionalIdx); 1743 1744 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1745 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1746 1747 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1748 bool IsAtomic = false); 1749 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1750 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1751 1752 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1753 1754 bool parseDimId(unsigned &Encoding); 1755 OperandMatchResultTy parseDim(OperandVector &Operands); 1756 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1757 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1758 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1759 int64_t parseDPPCtrlSel(StringRef Ctrl); 1760 int64_t parseDPPCtrlPerm(); 1761 AMDGPUOperand::Ptr defaultRowMask() const; 1762 AMDGPUOperand::Ptr defaultBankMask() const; 1763 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1764 AMDGPUOperand::Ptr defaultFI() const; 1765 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1766 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1767 cvtDPP(Inst, Operands, true); 1768 } 1769 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1770 bool IsDPP8 = false); 1771 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1772 cvtVOPCNoDstDPP(Inst, Operands, true); 1773 } 1774 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1775 bool IsDPP8 = false); 1776 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1777 cvtVOP3DPP(Inst, Operands, true); 1778 } 1779 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1780 bool IsDPP8 = false); 1781 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1782 cvtVOPC64NoDstDPP(Inst, Operands, true); 1783 } 1784 1785 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1786 AMDGPUOperand::ImmTy Type); 1787 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1788 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1789 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1794 uint64_t BasicInstType, 1795 bool SkipDstVcc = false, 1796 bool SkipSrcVcc = false); 1797 1798 AMDGPUOperand::Ptr defaultBLGP() const; 1799 AMDGPUOperand::Ptr defaultCBSZ() const; 1800 AMDGPUOperand::Ptr defaultABID() const; 1801 1802 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1803 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1804 1805 AMDGPUOperand::Ptr defaultWaitVDST() const; 1806 AMDGPUOperand::Ptr defaultWaitEXP() const; 1807 }; 1808 1809 struct OptionalOperand { 1810 const char *Name; 1811 AMDGPUOperand::ImmTy Type; 1812 bool IsBit; 1813 bool (*ConvertResult)(int64_t&); 1814 }; 1815 1816 } // end anonymous namespace 1817 1818 // May be called with integer type with equivalent bitwidth. 1819 static const fltSemantics *getFltSemantics(unsigned Size) { 1820 switch (Size) { 1821 case 4: 1822 return &APFloat::IEEEsingle(); 1823 case 8: 1824 return &APFloat::IEEEdouble(); 1825 case 2: 1826 return &APFloat::IEEEhalf(); 1827 default: 1828 llvm_unreachable("unsupported fp type"); 1829 } 1830 } 1831 1832 static const fltSemantics *getFltSemantics(MVT VT) { 1833 return getFltSemantics(VT.getSizeInBits() / 8); 1834 } 1835 1836 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1837 switch (OperandType) { 1838 case AMDGPU::OPERAND_REG_IMM_INT32: 1839 case AMDGPU::OPERAND_REG_IMM_FP32: 1840 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1841 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1842 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1843 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1844 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1846 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1848 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1849 case AMDGPU::OPERAND_KIMM32: 1850 return &APFloat::IEEEsingle(); 1851 case AMDGPU::OPERAND_REG_IMM_INT64: 1852 case AMDGPU::OPERAND_REG_IMM_FP64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1854 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1856 return &APFloat::IEEEdouble(); 1857 case AMDGPU::OPERAND_REG_IMM_INT16: 1858 case AMDGPU::OPERAND_REG_IMM_FP16: 1859 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1860 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1861 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1862 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1868 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1869 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1870 case AMDGPU::OPERAND_KIMM16: 1871 return &APFloat::IEEEhalf(); 1872 default: 1873 llvm_unreachable("unsupported fp type"); 1874 } 1875 } 1876 1877 //===----------------------------------------------------------------------===// 1878 // Operand 1879 //===----------------------------------------------------------------------===// 1880 1881 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1882 bool Lost; 1883 1884 // Convert literal to single precision 1885 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1886 APFloat::rmNearestTiesToEven, 1887 &Lost); 1888 // We allow precision lost but not overflow or underflow 1889 if (Status != APFloat::opOK && 1890 Lost && 1891 ((Status & APFloat::opOverflow) != 0 || 1892 (Status & APFloat::opUnderflow) != 0)) { 1893 return false; 1894 } 1895 1896 return true; 1897 } 1898 1899 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1900 return isUIntN(Size, Val) || isIntN(Size, Val); 1901 } 1902 1903 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1904 if (VT.getScalarType() == MVT::i16) { 1905 // FP immediate values are broken. 1906 return isInlinableIntLiteral(Val); 1907 } 1908 1909 // f16/v2f16 operands work correctly for all values. 1910 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1911 } 1912 1913 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1914 1915 // This is a hack to enable named inline values like 1916 // shared_base with both 32-bit and 64-bit operands. 1917 // Note that these values are defined as 1918 // 32-bit operands only. 1919 if (isInlineValue()) { 1920 return true; 1921 } 1922 1923 if (!isImmTy(ImmTyNone)) { 1924 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1925 return false; 1926 } 1927 // TODO: We should avoid using host float here. It would be better to 1928 // check the float bit values which is what a few other places do. 1929 // We've had bot failures before due to weird NaN support on mips hosts. 1930 1931 APInt Literal(64, Imm.Val); 1932 1933 if (Imm.IsFPImm) { // We got fp literal token 1934 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1935 return AMDGPU::isInlinableLiteral64(Imm.Val, 1936 AsmParser->hasInv2PiInlineImm()); 1937 } 1938 1939 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1940 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1941 return false; 1942 1943 if (type.getScalarSizeInBits() == 16) { 1944 return isInlineableLiteralOp16( 1945 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1946 type, AsmParser->hasInv2PiInlineImm()); 1947 } 1948 1949 // Check if single precision literal is inlinable 1950 return AMDGPU::isInlinableLiteral32( 1951 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1952 AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 // We got int literal token. 1956 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1957 return AMDGPU::isInlinableLiteral64(Imm.Val, 1958 AsmParser->hasInv2PiInlineImm()); 1959 } 1960 1961 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1962 return false; 1963 } 1964 1965 if (type.getScalarSizeInBits() == 16) { 1966 return isInlineableLiteralOp16( 1967 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1968 type, AsmParser->hasInv2PiInlineImm()); 1969 } 1970 1971 return AMDGPU::isInlinableLiteral32( 1972 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1973 AsmParser->hasInv2PiInlineImm()); 1974 } 1975 1976 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1977 // Check that this immediate can be added as literal 1978 if (!isImmTy(ImmTyNone)) { 1979 return false; 1980 } 1981 1982 if (!Imm.IsFPImm) { 1983 // We got int literal token. 1984 1985 if (type == MVT::f64 && hasFPModifiers()) { 1986 // Cannot apply fp modifiers to int literals preserving the same semantics 1987 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1988 // disable these cases. 1989 return false; 1990 } 1991 1992 unsigned Size = type.getSizeInBits(); 1993 if (Size == 64) 1994 Size = 32; 1995 1996 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1997 // types. 1998 return isSafeTruncation(Imm.Val, Size); 1999 } 2000 2001 // We got fp literal token 2002 if (type == MVT::f64) { // Expected 64-bit fp operand 2003 // We would set low 64-bits of literal to zeroes but we accept this literals 2004 return true; 2005 } 2006 2007 if (type == MVT::i64) { // Expected 64-bit int operand 2008 // We don't allow fp literals in 64-bit integer instructions. It is 2009 // unclear how we should encode them. 2010 return false; 2011 } 2012 2013 // We allow fp literals with f16x2 operands assuming that the specified 2014 // literal goes into the lower half and the upper half is zero. We also 2015 // require that the literal may be losslessly converted to f16. 2016 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2017 (type == MVT::v2i16)? MVT::i16 : 2018 (type == MVT::v2f32)? MVT::f32 : type; 2019 2020 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2021 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2022 } 2023 2024 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2025 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2026 } 2027 2028 bool AMDGPUOperand::isVRegWithInputMods() const { 2029 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2030 // GFX90A allows DPP on 64-bit operands. 2031 (isRegClass(AMDGPU::VReg_64RegClassID) && 2032 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2033 } 2034 2035 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2036 if (AsmParser->isVI()) 2037 return isVReg32(); 2038 else if (AsmParser->isGFX9Plus()) 2039 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2040 else 2041 return false; 2042 } 2043 2044 bool AMDGPUOperand::isSDWAFP16Operand() const { 2045 return isSDWAOperand(MVT::f16); 2046 } 2047 2048 bool AMDGPUOperand::isSDWAFP32Operand() const { 2049 return isSDWAOperand(MVT::f32); 2050 } 2051 2052 bool AMDGPUOperand::isSDWAInt16Operand() const { 2053 return isSDWAOperand(MVT::i16); 2054 } 2055 2056 bool AMDGPUOperand::isSDWAInt32Operand() const { 2057 return isSDWAOperand(MVT::i32); 2058 } 2059 2060 bool AMDGPUOperand::isBoolReg() const { 2061 auto FB = AsmParser->getFeatureBits(); 2062 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2063 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2064 } 2065 2066 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2067 { 2068 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2069 assert(Size == 2 || Size == 4 || Size == 8); 2070 2071 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2072 2073 if (Imm.Mods.Abs) { 2074 Val &= ~FpSignMask; 2075 } 2076 if (Imm.Mods.Neg) { 2077 Val ^= FpSignMask; 2078 } 2079 2080 return Val; 2081 } 2082 2083 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2084 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2085 Inst.getNumOperands())) { 2086 addLiteralImmOperand(Inst, Imm.Val, 2087 ApplyModifiers & 2088 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2089 } else { 2090 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2091 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2092 setImmKindNone(); 2093 } 2094 } 2095 2096 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2097 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2098 auto OpNum = Inst.getNumOperands(); 2099 // Check that this operand accepts literals 2100 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2101 2102 if (ApplyModifiers) { 2103 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2104 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2105 Val = applyInputFPModifiers(Val, Size); 2106 } 2107 2108 APInt Literal(64, Val); 2109 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2110 2111 if (Imm.IsFPImm) { // We got fp literal token 2112 switch (OpTy) { 2113 case AMDGPU::OPERAND_REG_IMM_INT64: 2114 case AMDGPU::OPERAND_REG_IMM_FP64: 2115 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2116 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2118 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2119 AsmParser->hasInv2PiInlineImm())) { 2120 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2121 setImmKindConst(); 2122 return; 2123 } 2124 2125 // Non-inlineable 2126 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2127 // For fp operands we check if low 32 bits are zeros 2128 if (Literal.getLoBits(32) != 0) { 2129 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2130 "Can't encode literal as exact 64-bit floating-point operand. " 2131 "Low 32-bits will be set to zero"); 2132 } 2133 2134 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2135 setImmKindLiteral(); 2136 return; 2137 } 2138 2139 // We don't allow fp literals in 64-bit integer instructions. It is 2140 // unclear how we should encode them. This case should be checked earlier 2141 // in predicate methods (isLiteralImm()) 2142 llvm_unreachable("fp literal in 64-bit integer instruction."); 2143 2144 case AMDGPU::OPERAND_REG_IMM_INT32: 2145 case AMDGPU::OPERAND_REG_IMM_FP32: 2146 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2147 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2148 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2149 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2150 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2151 case AMDGPU::OPERAND_REG_IMM_INT16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16: 2153 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2156 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2162 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2163 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2164 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2166 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2167 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2168 case AMDGPU::OPERAND_KIMM32: 2169 case AMDGPU::OPERAND_KIMM16: { 2170 bool lost; 2171 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2172 // Convert literal to single precision 2173 FPLiteral.convert(*getOpFltSemantics(OpTy), 2174 APFloat::rmNearestTiesToEven, &lost); 2175 // We allow precision lost but not overflow or underflow. This should be 2176 // checked earlier in isLiteralImm() 2177 2178 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2179 Inst.addOperand(MCOperand::createImm(ImmVal)); 2180 setImmKindLiteral(); 2181 return; 2182 } 2183 default: 2184 llvm_unreachable("invalid operand size"); 2185 } 2186 2187 return; 2188 } 2189 2190 // We got int literal token. 2191 // Only sign extend inline immediates. 2192 switch (OpTy) { 2193 case AMDGPU::OPERAND_REG_IMM_INT32: 2194 case AMDGPU::OPERAND_REG_IMM_FP32: 2195 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2196 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2197 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2198 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2200 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2201 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2202 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2203 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2204 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2206 if (isSafeTruncation(Val, 32) && 2207 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2208 AsmParser->hasInv2PiInlineImm())) { 2209 Inst.addOperand(MCOperand::createImm(Val)); 2210 setImmKindConst(); 2211 return; 2212 } 2213 2214 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2215 setImmKindLiteral(); 2216 return; 2217 2218 case AMDGPU::OPERAND_REG_IMM_INT64: 2219 case AMDGPU::OPERAND_REG_IMM_FP64: 2220 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2221 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2222 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2223 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 setImmKindConst(); 2226 return; 2227 } 2228 2229 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2230 setImmKindLiteral(); 2231 return; 2232 2233 case AMDGPU::OPERAND_REG_IMM_INT16: 2234 case AMDGPU::OPERAND_REG_IMM_FP16: 2235 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2236 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2237 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2240 if (isSafeTruncation(Val, 16) && 2241 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2242 AsmParser->hasInv2PiInlineImm())) { 2243 Inst.addOperand(MCOperand::createImm(Val)); 2244 setImmKindConst(); 2245 return; 2246 } 2247 2248 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2249 setImmKindLiteral(); 2250 return; 2251 2252 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2253 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2254 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2255 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2256 assert(isSafeTruncation(Val, 16)); 2257 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2258 AsmParser->hasInv2PiInlineImm())); 2259 2260 Inst.addOperand(MCOperand::createImm(Val)); 2261 return; 2262 } 2263 case AMDGPU::OPERAND_KIMM32: 2264 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2265 setImmKindNone(); 2266 return; 2267 case AMDGPU::OPERAND_KIMM16: 2268 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2269 setImmKindNone(); 2270 return; 2271 default: 2272 llvm_unreachable("invalid operand size"); 2273 } 2274 } 2275 2276 template <unsigned Bitwidth> 2277 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2278 APInt Literal(64, Imm.Val); 2279 setImmKindNone(); 2280 2281 if (!Imm.IsFPImm) { 2282 // We got int literal token. 2283 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2284 return; 2285 } 2286 2287 bool Lost; 2288 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2289 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2290 APFloat::rmNearestTiesToEven, &Lost); 2291 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2292 } 2293 2294 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2295 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2296 } 2297 2298 static bool isInlineValue(unsigned Reg) { 2299 switch (Reg) { 2300 case AMDGPU::SRC_SHARED_BASE: 2301 case AMDGPU::SRC_SHARED_LIMIT: 2302 case AMDGPU::SRC_PRIVATE_BASE: 2303 case AMDGPU::SRC_PRIVATE_LIMIT: 2304 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2305 return true; 2306 case AMDGPU::SRC_VCCZ: 2307 case AMDGPU::SRC_EXECZ: 2308 case AMDGPU::SRC_SCC: 2309 return true; 2310 case AMDGPU::SGPR_NULL: 2311 return true; 2312 default: 2313 return false; 2314 } 2315 } 2316 2317 bool AMDGPUOperand::isInlineValue() const { 2318 return isRegKind() && ::isInlineValue(getReg()); 2319 } 2320 2321 //===----------------------------------------------------------------------===// 2322 // AsmParser 2323 //===----------------------------------------------------------------------===// 2324 2325 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2326 if (Is == IS_VGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::VGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::VReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::VReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::VReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::VReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::VReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::VReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::VReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::VReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::VReg_1024RegClassID; 2349 } 2350 } else if (Is == IS_TTMP) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::TTMP_32RegClassID; 2355 case 64: 2356 return AMDGPU::TTMP_64RegClassID; 2357 case 128: 2358 return AMDGPU::TTMP_128RegClassID; 2359 case 256: 2360 return AMDGPU::TTMP_256RegClassID; 2361 case 512: 2362 return AMDGPU::TTMP_512RegClassID; 2363 } 2364 } else if (Is == IS_SGPR) { 2365 switch (RegWidth) { 2366 default: return -1; 2367 case 32: 2368 return AMDGPU::SGPR_32RegClassID; 2369 case 64: 2370 return AMDGPU::SGPR_64RegClassID; 2371 case 96: 2372 return AMDGPU::SGPR_96RegClassID; 2373 case 128: 2374 return AMDGPU::SGPR_128RegClassID; 2375 case 160: 2376 return AMDGPU::SGPR_160RegClassID; 2377 case 192: 2378 return AMDGPU::SGPR_192RegClassID; 2379 case 224: 2380 return AMDGPU::SGPR_224RegClassID; 2381 case 256: 2382 return AMDGPU::SGPR_256RegClassID; 2383 case 512: 2384 return AMDGPU::SGPR_512RegClassID; 2385 } 2386 } else if (Is == IS_AGPR) { 2387 switch (RegWidth) { 2388 default: return -1; 2389 case 32: 2390 return AMDGPU::AGPR_32RegClassID; 2391 case 64: 2392 return AMDGPU::AReg_64RegClassID; 2393 case 96: 2394 return AMDGPU::AReg_96RegClassID; 2395 case 128: 2396 return AMDGPU::AReg_128RegClassID; 2397 case 160: 2398 return AMDGPU::AReg_160RegClassID; 2399 case 192: 2400 return AMDGPU::AReg_192RegClassID; 2401 case 224: 2402 return AMDGPU::AReg_224RegClassID; 2403 case 256: 2404 return AMDGPU::AReg_256RegClassID; 2405 case 512: 2406 return AMDGPU::AReg_512RegClassID; 2407 case 1024: 2408 return AMDGPU::AReg_1024RegClassID; 2409 } 2410 } 2411 return -1; 2412 } 2413 2414 static unsigned getSpecialRegForName(StringRef RegName) { 2415 return StringSwitch<unsigned>(RegName) 2416 .Case("exec", AMDGPU::EXEC) 2417 .Case("vcc", AMDGPU::VCC) 2418 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2419 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2420 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2421 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2422 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2423 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2424 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2425 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2426 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2427 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2428 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2429 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2430 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2431 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2432 .Case("m0", AMDGPU::M0) 2433 .Case("vccz", AMDGPU::SRC_VCCZ) 2434 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2435 .Case("execz", AMDGPU::SRC_EXECZ) 2436 .Case("src_execz", AMDGPU::SRC_EXECZ) 2437 .Case("scc", AMDGPU::SRC_SCC) 2438 .Case("src_scc", AMDGPU::SRC_SCC) 2439 .Case("tba", AMDGPU::TBA) 2440 .Case("tma", AMDGPU::TMA) 2441 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2442 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2443 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2444 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2445 .Case("vcc_lo", AMDGPU::VCC_LO) 2446 .Case("vcc_hi", AMDGPU::VCC_HI) 2447 .Case("exec_lo", AMDGPU::EXEC_LO) 2448 .Case("exec_hi", AMDGPU::EXEC_HI) 2449 .Case("tma_lo", AMDGPU::TMA_LO) 2450 .Case("tma_hi", AMDGPU::TMA_HI) 2451 .Case("tba_lo", AMDGPU::TBA_LO) 2452 .Case("tba_hi", AMDGPU::TBA_HI) 2453 .Case("pc", AMDGPU::PC_REG) 2454 .Case("null", AMDGPU::SGPR_NULL) 2455 .Default(AMDGPU::NoRegister); 2456 } 2457 2458 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2459 SMLoc &EndLoc, bool RestoreOnFailure) { 2460 auto R = parseRegister(); 2461 if (!R) return true; 2462 assert(R->isReg()); 2463 RegNo = R->getReg(); 2464 StartLoc = R->getStartLoc(); 2465 EndLoc = R->getEndLoc(); 2466 return false; 2467 } 2468 2469 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2470 SMLoc &EndLoc) { 2471 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2472 } 2473 2474 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2475 SMLoc &StartLoc, 2476 SMLoc &EndLoc) { 2477 bool Result = 2478 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2479 bool PendingErrors = getParser().hasPendingError(); 2480 getParser().clearPendingErrors(); 2481 if (PendingErrors) 2482 return MatchOperand_ParseFail; 2483 if (Result) 2484 return MatchOperand_NoMatch; 2485 return MatchOperand_Success; 2486 } 2487 2488 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2489 RegisterKind RegKind, unsigned Reg1, 2490 SMLoc Loc) { 2491 switch (RegKind) { 2492 case IS_SPECIAL: 2493 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2494 Reg = AMDGPU::EXEC; 2495 RegWidth = 64; 2496 return true; 2497 } 2498 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2499 Reg = AMDGPU::FLAT_SCR; 2500 RegWidth = 64; 2501 return true; 2502 } 2503 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2504 Reg = AMDGPU::XNACK_MASK; 2505 RegWidth = 64; 2506 return true; 2507 } 2508 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2509 Reg = AMDGPU::VCC; 2510 RegWidth = 64; 2511 return true; 2512 } 2513 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2514 Reg = AMDGPU::TBA; 2515 RegWidth = 64; 2516 return true; 2517 } 2518 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2519 Reg = AMDGPU::TMA; 2520 RegWidth = 64; 2521 return true; 2522 } 2523 Error(Loc, "register does not fit in the list"); 2524 return false; 2525 case IS_VGPR: 2526 case IS_SGPR: 2527 case IS_AGPR: 2528 case IS_TTMP: 2529 if (Reg1 != Reg + RegWidth / 32) { 2530 Error(Loc, "registers in a list must have consecutive indices"); 2531 return false; 2532 } 2533 RegWidth += 32; 2534 return true; 2535 default: 2536 llvm_unreachable("unexpected register kind"); 2537 } 2538 } 2539 2540 struct RegInfo { 2541 StringLiteral Name; 2542 RegisterKind Kind; 2543 }; 2544 2545 static constexpr RegInfo RegularRegisters[] = { 2546 {{"v"}, IS_VGPR}, 2547 {{"s"}, IS_SGPR}, 2548 {{"ttmp"}, IS_TTMP}, 2549 {{"acc"}, IS_AGPR}, 2550 {{"a"}, IS_AGPR}, 2551 }; 2552 2553 static bool isRegularReg(RegisterKind Kind) { 2554 return Kind == IS_VGPR || 2555 Kind == IS_SGPR || 2556 Kind == IS_TTMP || 2557 Kind == IS_AGPR; 2558 } 2559 2560 static const RegInfo* getRegularRegInfo(StringRef Str) { 2561 for (const RegInfo &Reg : RegularRegisters) 2562 if (Str.startswith(Reg.Name)) 2563 return &Reg; 2564 return nullptr; 2565 } 2566 2567 static bool getRegNum(StringRef Str, unsigned& Num) { 2568 return !Str.getAsInteger(10, Num); 2569 } 2570 2571 bool 2572 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2573 const AsmToken &NextToken) const { 2574 2575 // A list of consecutive registers: [s0,s1,s2,s3] 2576 if (Token.is(AsmToken::LBrac)) 2577 return true; 2578 2579 if (!Token.is(AsmToken::Identifier)) 2580 return false; 2581 2582 // A single register like s0 or a range of registers like s[0:1] 2583 2584 StringRef Str = Token.getString(); 2585 const RegInfo *Reg = getRegularRegInfo(Str); 2586 if (Reg) { 2587 StringRef RegName = Reg->Name; 2588 StringRef RegSuffix = Str.substr(RegName.size()); 2589 if (!RegSuffix.empty()) { 2590 unsigned Num; 2591 // A single register with an index: rXX 2592 if (getRegNum(RegSuffix, Num)) 2593 return true; 2594 } else { 2595 // A range of registers: r[XX:YY]. 2596 if (NextToken.is(AsmToken::LBrac)) 2597 return true; 2598 } 2599 } 2600 2601 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2602 } 2603 2604 bool 2605 AMDGPUAsmParser::isRegister() 2606 { 2607 return isRegister(getToken(), peekToken()); 2608 } 2609 2610 unsigned 2611 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2612 unsigned RegNum, 2613 unsigned RegWidth, 2614 SMLoc Loc) { 2615 2616 assert(isRegularReg(RegKind)); 2617 2618 unsigned AlignSize = 1; 2619 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2620 // SGPR and TTMP registers must be aligned. 2621 // Max required alignment is 4 dwords. 2622 AlignSize = std::min(RegWidth / 32, 4u); 2623 } 2624 2625 if (RegNum % AlignSize != 0) { 2626 Error(Loc, "invalid register alignment"); 2627 return AMDGPU::NoRegister; 2628 } 2629 2630 unsigned RegIdx = RegNum / AlignSize; 2631 int RCID = getRegClass(RegKind, RegWidth); 2632 if (RCID == -1) { 2633 Error(Loc, "invalid or unsupported register size"); 2634 return AMDGPU::NoRegister; 2635 } 2636 2637 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2638 const MCRegisterClass RC = TRI->getRegClass(RCID); 2639 if (RegIdx >= RC.getNumRegs()) { 2640 Error(Loc, "register index is out of range"); 2641 return AMDGPU::NoRegister; 2642 } 2643 2644 return RC.getRegister(RegIdx); 2645 } 2646 2647 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2648 int64_t RegLo, RegHi; 2649 if (!skipToken(AsmToken::LBrac, "missing register index")) 2650 return false; 2651 2652 SMLoc FirstIdxLoc = getLoc(); 2653 SMLoc SecondIdxLoc; 2654 2655 if (!parseExpr(RegLo)) 2656 return false; 2657 2658 if (trySkipToken(AsmToken::Colon)) { 2659 SecondIdxLoc = getLoc(); 2660 if (!parseExpr(RegHi)) 2661 return false; 2662 } else { 2663 RegHi = RegLo; 2664 } 2665 2666 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2667 return false; 2668 2669 if (!isUInt<32>(RegLo)) { 2670 Error(FirstIdxLoc, "invalid register index"); 2671 return false; 2672 } 2673 2674 if (!isUInt<32>(RegHi)) { 2675 Error(SecondIdxLoc, "invalid register index"); 2676 return false; 2677 } 2678 2679 if (RegLo > RegHi) { 2680 Error(FirstIdxLoc, "first register index should not exceed second index"); 2681 return false; 2682 } 2683 2684 Num = static_cast<unsigned>(RegLo); 2685 RegWidth = 32 * ((RegHi - RegLo) + 1); 2686 return true; 2687 } 2688 2689 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2690 unsigned &RegNum, unsigned &RegWidth, 2691 SmallVectorImpl<AsmToken> &Tokens) { 2692 assert(isToken(AsmToken::Identifier)); 2693 unsigned Reg = getSpecialRegForName(getTokenStr()); 2694 if (Reg) { 2695 RegNum = 0; 2696 RegWidth = 32; 2697 RegKind = IS_SPECIAL; 2698 Tokens.push_back(getToken()); 2699 lex(); // skip register name 2700 } 2701 return Reg; 2702 } 2703 2704 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2705 unsigned &RegNum, unsigned &RegWidth, 2706 SmallVectorImpl<AsmToken> &Tokens) { 2707 assert(isToken(AsmToken::Identifier)); 2708 StringRef RegName = getTokenStr(); 2709 auto Loc = getLoc(); 2710 2711 const RegInfo *RI = getRegularRegInfo(RegName); 2712 if (!RI) { 2713 Error(Loc, "invalid register name"); 2714 return AMDGPU::NoRegister; 2715 } 2716 2717 Tokens.push_back(getToken()); 2718 lex(); // skip register name 2719 2720 RegKind = RI->Kind; 2721 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2722 if (!RegSuffix.empty()) { 2723 // Single 32-bit register: vXX. 2724 if (!getRegNum(RegSuffix, RegNum)) { 2725 Error(Loc, "invalid register index"); 2726 return AMDGPU::NoRegister; 2727 } 2728 RegWidth = 32; 2729 } else { 2730 // Range of registers: v[XX:YY]. ":YY" is optional. 2731 if (!ParseRegRange(RegNum, RegWidth)) 2732 return AMDGPU::NoRegister; 2733 } 2734 2735 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2736 } 2737 2738 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2739 unsigned &RegWidth, 2740 SmallVectorImpl<AsmToken> &Tokens) { 2741 unsigned Reg = AMDGPU::NoRegister; 2742 auto ListLoc = getLoc(); 2743 2744 if (!skipToken(AsmToken::LBrac, 2745 "expected a register or a list of registers")) { 2746 return AMDGPU::NoRegister; 2747 } 2748 2749 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2750 2751 auto Loc = getLoc(); 2752 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2753 return AMDGPU::NoRegister; 2754 if (RegWidth != 32) { 2755 Error(Loc, "expected a single 32-bit register"); 2756 return AMDGPU::NoRegister; 2757 } 2758 2759 for (; trySkipToken(AsmToken::Comma); ) { 2760 RegisterKind NextRegKind; 2761 unsigned NextReg, NextRegNum, NextRegWidth; 2762 Loc = getLoc(); 2763 2764 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2765 NextRegNum, NextRegWidth, 2766 Tokens)) { 2767 return AMDGPU::NoRegister; 2768 } 2769 if (NextRegWidth != 32) { 2770 Error(Loc, "expected a single 32-bit register"); 2771 return AMDGPU::NoRegister; 2772 } 2773 if (NextRegKind != RegKind) { 2774 Error(Loc, "registers in a list must be of the same kind"); 2775 return AMDGPU::NoRegister; 2776 } 2777 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2778 return AMDGPU::NoRegister; 2779 } 2780 2781 if (!skipToken(AsmToken::RBrac, 2782 "expected a comma or a closing square bracket")) { 2783 return AMDGPU::NoRegister; 2784 } 2785 2786 if (isRegularReg(RegKind)) 2787 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2788 2789 return Reg; 2790 } 2791 2792 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2793 unsigned &RegNum, unsigned &RegWidth, 2794 SmallVectorImpl<AsmToken> &Tokens) { 2795 auto Loc = getLoc(); 2796 Reg = AMDGPU::NoRegister; 2797 2798 if (isToken(AsmToken::Identifier)) { 2799 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2800 if (Reg == AMDGPU::NoRegister) 2801 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2802 } else { 2803 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2804 } 2805 2806 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2807 if (Reg == AMDGPU::NoRegister) { 2808 assert(Parser.hasPendingError()); 2809 return false; 2810 } 2811 2812 if (!subtargetHasRegister(*TRI, Reg)) { 2813 if (Reg == AMDGPU::SGPR_NULL) { 2814 Error(Loc, "'null' operand is not supported on this GPU"); 2815 } else { 2816 Error(Loc, "register not available on this GPU"); 2817 } 2818 return false; 2819 } 2820 2821 return true; 2822 } 2823 2824 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2825 unsigned &RegNum, unsigned &RegWidth, 2826 bool RestoreOnFailure /*=false*/) { 2827 Reg = AMDGPU::NoRegister; 2828 2829 SmallVector<AsmToken, 1> Tokens; 2830 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2831 if (RestoreOnFailure) { 2832 while (!Tokens.empty()) { 2833 getLexer().UnLex(Tokens.pop_back_val()); 2834 } 2835 } 2836 return true; 2837 } 2838 return false; 2839 } 2840 2841 Optional<StringRef> 2842 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2843 switch (RegKind) { 2844 case IS_VGPR: 2845 return StringRef(".amdgcn.next_free_vgpr"); 2846 case IS_SGPR: 2847 return StringRef(".amdgcn.next_free_sgpr"); 2848 default: 2849 return None; 2850 } 2851 } 2852 2853 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2854 auto SymbolName = getGprCountSymbolName(RegKind); 2855 assert(SymbolName && "initializing invalid register kind"); 2856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2857 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2858 } 2859 2860 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2861 unsigned DwordRegIndex, 2862 unsigned RegWidth) { 2863 // Symbols are only defined for GCN targets 2864 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2865 return true; 2866 2867 auto SymbolName = getGprCountSymbolName(RegKind); 2868 if (!SymbolName) 2869 return true; 2870 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2871 2872 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2873 int64_t OldCount; 2874 2875 if (!Sym->isVariable()) 2876 return !Error(getLoc(), 2877 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2878 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2879 return !Error( 2880 getLoc(), 2881 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2882 2883 if (OldCount <= NewMax) 2884 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2885 2886 return true; 2887 } 2888 2889 std::unique_ptr<AMDGPUOperand> 2890 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2891 const auto &Tok = getToken(); 2892 SMLoc StartLoc = Tok.getLoc(); 2893 SMLoc EndLoc = Tok.getEndLoc(); 2894 RegisterKind RegKind; 2895 unsigned Reg, RegNum, RegWidth; 2896 2897 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2898 return nullptr; 2899 } 2900 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2901 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2902 return nullptr; 2903 } else 2904 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2905 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2906 } 2907 2908 OperandMatchResultTy 2909 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2910 // TODO: add syntactic sugar for 1/(2*PI) 2911 2912 assert(!isRegister()); 2913 assert(!isModifier()); 2914 2915 const auto& Tok = getToken(); 2916 const auto& NextTok = peekToken(); 2917 bool IsReal = Tok.is(AsmToken::Real); 2918 SMLoc S = getLoc(); 2919 bool Negate = false; 2920 2921 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2922 lex(); 2923 IsReal = true; 2924 Negate = true; 2925 } 2926 2927 if (IsReal) { 2928 // Floating-point expressions are not supported. 2929 // Can only allow floating-point literals with an 2930 // optional sign. 2931 2932 StringRef Num = getTokenStr(); 2933 lex(); 2934 2935 APFloat RealVal(APFloat::IEEEdouble()); 2936 auto roundMode = APFloat::rmNearestTiesToEven; 2937 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2938 return MatchOperand_ParseFail; 2939 } 2940 if (Negate) 2941 RealVal.changeSign(); 2942 2943 Operands.push_back( 2944 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2945 AMDGPUOperand::ImmTyNone, true)); 2946 2947 return MatchOperand_Success; 2948 2949 } else { 2950 int64_t IntVal; 2951 const MCExpr *Expr; 2952 SMLoc S = getLoc(); 2953 2954 if (HasSP3AbsModifier) { 2955 // This is a workaround for handling expressions 2956 // as arguments of SP3 'abs' modifier, for example: 2957 // |1.0| 2958 // |-1| 2959 // |1+x| 2960 // This syntax is not compatible with syntax of standard 2961 // MC expressions (due to the trailing '|'). 2962 SMLoc EndLoc; 2963 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2964 return MatchOperand_ParseFail; 2965 } else { 2966 if (Parser.parseExpression(Expr)) 2967 return MatchOperand_ParseFail; 2968 } 2969 2970 if (Expr->evaluateAsAbsolute(IntVal)) { 2971 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2972 } else { 2973 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2974 } 2975 2976 return MatchOperand_Success; 2977 } 2978 2979 return MatchOperand_NoMatch; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2984 if (!isRegister()) 2985 return MatchOperand_NoMatch; 2986 2987 if (auto R = parseRegister()) { 2988 assert(R->isReg()); 2989 Operands.push_back(std::move(R)); 2990 return MatchOperand_Success; 2991 } 2992 return MatchOperand_ParseFail; 2993 } 2994 2995 OperandMatchResultTy 2996 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2997 auto res = parseReg(Operands); 2998 if (res != MatchOperand_NoMatch) { 2999 return res; 3000 } else if (isModifier()) { 3001 return MatchOperand_NoMatch; 3002 } else { 3003 return parseImm(Operands, HasSP3AbsMod); 3004 } 3005 } 3006 3007 bool 3008 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3009 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3010 const auto &str = Token.getString(); 3011 return str == "abs" || str == "neg" || str == "sext"; 3012 } 3013 return false; 3014 } 3015 3016 bool 3017 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3018 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3024 } 3025 3026 bool 3027 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3028 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3029 } 3030 3031 // Check if this is an operand modifier or an opcode modifier 3032 // which may look like an expression but it is not. We should 3033 // avoid parsing these modifiers as expressions. Currently 3034 // recognized sequences are: 3035 // |...| 3036 // abs(...) 3037 // neg(...) 3038 // sext(...) 3039 // -reg 3040 // -|...| 3041 // -abs(...) 3042 // name:... 3043 // Note that simple opcode modifiers like 'gds' may be parsed as 3044 // expressions; this is a special case. See getExpressionAsToken. 3045 // 3046 bool 3047 AMDGPUAsmParser::isModifier() { 3048 3049 AsmToken Tok = getToken(); 3050 AsmToken NextToken[2]; 3051 peekTokens(NextToken); 3052 3053 return isOperandModifier(Tok, NextToken[0]) || 3054 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3055 isOpcodeModifierWithVal(Tok, NextToken[0]); 3056 } 3057 3058 // Check if the current token is an SP3 'neg' modifier. 3059 // Currently this modifier is allowed in the following context: 3060 // 3061 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3062 // 2. Before an 'abs' modifier: -abs(...) 3063 // 3. Before an SP3 'abs' modifier: -|...| 3064 // 3065 // In all other cases "-" is handled as a part 3066 // of an expression that follows the sign. 3067 // 3068 // Note: When "-" is followed by an integer literal, 3069 // this is interpreted as integer negation rather 3070 // than a floating-point NEG modifier applied to N. 3071 // Beside being contr-intuitive, such use of floating-point 3072 // NEG modifier would have resulted in different meaning 3073 // of integer literals used with VOP1/2/C and VOP3, 3074 // for example: 3075 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3076 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3077 // Negative fp literals with preceding "-" are 3078 // handled likewise for uniformity 3079 // 3080 bool 3081 AMDGPUAsmParser::parseSP3NegModifier() { 3082 3083 AsmToken NextToken[2]; 3084 peekTokens(NextToken); 3085 3086 if (isToken(AsmToken::Minus) && 3087 (isRegister(NextToken[0], NextToken[1]) || 3088 NextToken[0].is(AsmToken::Pipe) || 3089 isId(NextToken[0], "abs"))) { 3090 lex(); 3091 return true; 3092 } 3093 3094 return false; 3095 } 3096 3097 OperandMatchResultTy 3098 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3099 bool AllowImm) { 3100 bool Neg, SP3Neg; 3101 bool Abs, SP3Abs; 3102 SMLoc Loc; 3103 3104 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3105 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3106 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3107 return MatchOperand_ParseFail; 3108 } 3109 3110 SP3Neg = parseSP3NegModifier(); 3111 3112 Loc = getLoc(); 3113 Neg = trySkipId("neg"); 3114 if (Neg && SP3Neg) { 3115 Error(Loc, "expected register or immediate"); 3116 return MatchOperand_ParseFail; 3117 } 3118 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3119 return MatchOperand_ParseFail; 3120 3121 Abs = trySkipId("abs"); 3122 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3123 return MatchOperand_ParseFail; 3124 3125 Loc = getLoc(); 3126 SP3Abs = trySkipToken(AsmToken::Pipe); 3127 if (Abs && SP3Abs) { 3128 Error(Loc, "expected register or immediate"); 3129 return MatchOperand_ParseFail; 3130 } 3131 3132 OperandMatchResultTy Res; 3133 if (AllowImm) { 3134 Res = parseRegOrImm(Operands, SP3Abs); 3135 } else { 3136 Res = parseReg(Operands); 3137 } 3138 if (Res != MatchOperand_Success) { 3139 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3140 } 3141 3142 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3143 return MatchOperand_ParseFail; 3144 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3145 return MatchOperand_ParseFail; 3146 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3147 return MatchOperand_ParseFail; 3148 3149 AMDGPUOperand::Modifiers Mods; 3150 Mods.Abs = Abs || SP3Abs; 3151 Mods.Neg = Neg || SP3Neg; 3152 3153 if (Mods.hasFPModifiers()) { 3154 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3155 if (Op.isExpr()) { 3156 Error(Op.getStartLoc(), "expected an absolute expression"); 3157 return MatchOperand_ParseFail; 3158 } 3159 Op.setModifiers(Mods); 3160 } 3161 return MatchOperand_Success; 3162 } 3163 3164 OperandMatchResultTy 3165 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3166 bool AllowImm) { 3167 bool Sext = trySkipId("sext"); 3168 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3169 return MatchOperand_ParseFail; 3170 3171 OperandMatchResultTy Res; 3172 if (AllowImm) { 3173 Res = parseRegOrImm(Operands); 3174 } else { 3175 Res = parseReg(Operands); 3176 } 3177 if (Res != MatchOperand_Success) { 3178 return Sext? MatchOperand_ParseFail : Res; 3179 } 3180 3181 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3182 return MatchOperand_ParseFail; 3183 3184 AMDGPUOperand::Modifiers Mods; 3185 Mods.Sext = Sext; 3186 3187 if (Mods.hasIntModifiers()) { 3188 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3189 if (Op.isExpr()) { 3190 Error(Op.getStartLoc(), "expected an absolute expression"); 3191 return MatchOperand_ParseFail; 3192 } 3193 Op.setModifiers(Mods); 3194 } 3195 3196 return MatchOperand_Success; 3197 } 3198 3199 OperandMatchResultTy 3200 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3201 return parseRegOrImmWithFPInputMods(Operands, false); 3202 } 3203 3204 OperandMatchResultTy 3205 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3206 return parseRegOrImmWithIntInputMods(Operands, false); 3207 } 3208 3209 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3210 auto Loc = getLoc(); 3211 if (trySkipId("off")) { 3212 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3213 AMDGPUOperand::ImmTyOff, false)); 3214 return MatchOperand_Success; 3215 } 3216 3217 if (!isRegister()) 3218 return MatchOperand_NoMatch; 3219 3220 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3221 if (Reg) { 3222 Operands.push_back(std::move(Reg)); 3223 return MatchOperand_Success; 3224 } 3225 3226 return MatchOperand_ParseFail; 3227 3228 } 3229 3230 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3231 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3232 3233 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3234 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3235 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3236 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3237 return Match_InvalidOperand; 3238 3239 if ((TSFlags & SIInstrFlags::VOP3) && 3240 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3241 getForcedEncodingSize() != 64) 3242 return Match_PreferE32; 3243 3244 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3245 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3246 // v_mac_f32/16 allow only dst_sel == DWORD; 3247 auto OpNum = 3248 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3249 const auto &Op = Inst.getOperand(OpNum); 3250 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3251 return Match_InvalidOperand; 3252 } 3253 } 3254 3255 return Match_Success; 3256 } 3257 3258 static ArrayRef<unsigned> getAllVariants() { 3259 static const unsigned Variants[] = { 3260 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3261 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3262 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3263 }; 3264 3265 return makeArrayRef(Variants); 3266 } 3267 3268 // What asm variants we should check 3269 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3270 if (isForcedDPP() && isForcedVOP3()) { 3271 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3272 return makeArrayRef(Variants); 3273 } 3274 if (getForcedEncodingSize() == 32) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 if (isForcedVOP3()) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3281 return makeArrayRef(Variants); 3282 } 3283 3284 if (isForcedSDWA()) { 3285 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3286 AMDGPUAsmVariants::SDWA9}; 3287 return makeArrayRef(Variants); 3288 } 3289 3290 if (isForcedDPP()) { 3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3292 return makeArrayRef(Variants); 3293 } 3294 3295 return getAllVariants(); 3296 } 3297 3298 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3299 if (isForcedDPP() && isForcedVOP3()) 3300 return "e64_dpp"; 3301 3302 if (getForcedEncodingSize() == 32) 3303 return "e32"; 3304 3305 if (isForcedVOP3()) 3306 return "e64"; 3307 3308 if (isForcedSDWA()) 3309 return "sdwa"; 3310 3311 if (isForcedDPP()) 3312 return "dpp"; 3313 3314 return ""; 3315 } 3316 3317 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3318 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3319 const unsigned Num = Desc.getNumImplicitUses(); 3320 for (unsigned i = 0; i < Num; ++i) { 3321 unsigned Reg = Desc.ImplicitUses[i]; 3322 switch (Reg) { 3323 case AMDGPU::FLAT_SCR: 3324 case AMDGPU::VCC: 3325 case AMDGPU::VCC_LO: 3326 case AMDGPU::VCC_HI: 3327 case AMDGPU::M0: 3328 return Reg; 3329 default: 3330 break; 3331 } 3332 } 3333 return AMDGPU::NoRegister; 3334 } 3335 3336 // NB: This code is correct only when used to check constant 3337 // bus limitations because GFX7 support no f16 inline constants. 3338 // Note that there are no cases when a GFX7 opcode violates 3339 // constant bus limitations due to the use of an f16 constant. 3340 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3341 unsigned OpIdx) const { 3342 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3343 3344 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3345 return false; 3346 } 3347 3348 const MCOperand &MO = Inst.getOperand(OpIdx); 3349 3350 int64_t Val = MO.getImm(); 3351 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3352 3353 switch (OpSize) { // expected operand size 3354 case 8: 3355 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3356 case 4: 3357 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3358 case 2: { 3359 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3360 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3362 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3363 return AMDGPU::isInlinableIntLiteral(Val); 3364 3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3368 return AMDGPU::isInlinableIntLiteralV216(Val); 3369 3370 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3371 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3372 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3373 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3374 3375 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3376 } 3377 default: 3378 llvm_unreachable("invalid operand size"); 3379 } 3380 } 3381 3382 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3383 if (!isGFX10Plus()) 3384 return 1; 3385 3386 switch (Opcode) { 3387 // 64-bit shift instructions can use only one scalar value input 3388 case AMDGPU::V_LSHLREV_B64_e64: 3389 case AMDGPU::V_LSHLREV_B64_gfx10: 3390 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3391 case AMDGPU::V_LSHRREV_B64_e64: 3392 case AMDGPU::V_LSHRREV_B64_gfx10: 3393 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3394 case AMDGPU::V_ASHRREV_I64_e64: 3395 case AMDGPU::V_ASHRREV_I64_gfx10: 3396 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3397 case AMDGPU::V_LSHL_B64_e64: 3398 case AMDGPU::V_LSHR_B64_e64: 3399 case AMDGPU::V_ASHR_I64_e64: 3400 return 1; 3401 default: 3402 return 2; 3403 } 3404 } 3405 3406 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3407 const MCOperand &MO = Inst.getOperand(OpIdx); 3408 if (MO.isImm()) { 3409 return !isInlineConstant(Inst, OpIdx); 3410 } else if (MO.isReg()) { 3411 auto Reg = MO.getReg(); 3412 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3413 auto PReg = mc2PseudoReg(Reg); 3414 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3415 } else { 3416 return true; 3417 } 3418 } 3419 3420 bool 3421 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3422 const OperandVector &Operands) { 3423 const unsigned Opcode = Inst.getOpcode(); 3424 const MCInstrDesc &Desc = MII.get(Opcode); 3425 unsigned LastSGPR = AMDGPU::NoRegister; 3426 unsigned ConstantBusUseCount = 0; 3427 unsigned NumLiterals = 0; 3428 unsigned LiteralSize; 3429 3430 if (Desc.TSFlags & 3431 (SIInstrFlags::VOPC | 3432 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3433 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3434 SIInstrFlags::SDWA)) { 3435 // Check special imm operands (used by madmk, etc) 3436 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3437 ++NumLiterals; 3438 LiteralSize = 4; 3439 } 3440 3441 SmallDenseSet<unsigned> SGPRsUsed; 3442 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3443 if (SGPRUsed != AMDGPU::NoRegister) { 3444 SGPRsUsed.insert(SGPRUsed); 3445 ++ConstantBusUseCount; 3446 } 3447 3448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3449 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3450 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3451 3452 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3453 3454 for (int OpIdx : OpIndices) { 3455 if (OpIdx == -1) break; 3456 3457 const MCOperand &MO = Inst.getOperand(OpIdx); 3458 if (usesConstantBus(Inst, OpIdx)) { 3459 if (MO.isReg()) { 3460 LastSGPR = mc2PseudoReg(MO.getReg()); 3461 // Pairs of registers with a partial intersections like these 3462 // s0, s[0:1] 3463 // flat_scratch_lo, flat_scratch 3464 // flat_scratch_lo, flat_scratch_hi 3465 // are theoretically valid but they are disabled anyway. 3466 // Note that this code mimics SIInstrInfo::verifyInstruction 3467 if (!SGPRsUsed.count(LastSGPR)) { 3468 SGPRsUsed.insert(LastSGPR); 3469 ++ConstantBusUseCount; 3470 } 3471 } else { // Expression or a literal 3472 3473 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3474 continue; // special operand like VINTERP attr_chan 3475 3476 // An instruction may use only one literal. 3477 // This has been validated on the previous step. 3478 // See validateVOPLiteral. 3479 // This literal may be used as more than one operand. 3480 // If all these operands are of the same size, 3481 // this literal counts as one scalar value. 3482 // Otherwise it counts as 2 scalar values. 3483 // See "GFX10 Shader Programming", section 3.6.2.3. 3484 3485 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3486 if (Size < 4) Size = 4; 3487 3488 if (NumLiterals == 0) { 3489 NumLiterals = 1; 3490 LiteralSize = Size; 3491 } else if (LiteralSize != Size) { 3492 NumLiterals = 2; 3493 } 3494 } 3495 } 3496 } 3497 } 3498 ConstantBusUseCount += NumLiterals; 3499 3500 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3501 return true; 3502 3503 SMLoc LitLoc = getLitLoc(Operands); 3504 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3505 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3506 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3507 return false; 3508 } 3509 3510 bool 3511 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3512 const OperandVector &Operands) { 3513 const unsigned Opcode = Inst.getOpcode(); 3514 const MCInstrDesc &Desc = MII.get(Opcode); 3515 3516 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3517 if (DstIdx == -1 || 3518 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3519 return true; 3520 } 3521 3522 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3523 3524 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3525 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3526 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3527 3528 assert(DstIdx != -1); 3529 const MCOperand &Dst = Inst.getOperand(DstIdx); 3530 assert(Dst.isReg()); 3531 3532 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3533 3534 for (int SrcIdx : SrcIndices) { 3535 if (SrcIdx == -1) break; 3536 const MCOperand &Src = Inst.getOperand(SrcIdx); 3537 if (Src.isReg()) { 3538 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3539 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3540 Error(getRegLoc(SrcReg, Operands), 3541 "destination must be different than all sources"); 3542 return false; 3543 } 3544 } 3545 } 3546 3547 return true; 3548 } 3549 3550 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3551 3552 const unsigned Opc = Inst.getOpcode(); 3553 const MCInstrDesc &Desc = MII.get(Opc); 3554 3555 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3556 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3557 assert(ClampIdx != -1); 3558 return Inst.getOperand(ClampIdx).getImm() == 0; 3559 } 3560 3561 return true; 3562 } 3563 3564 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3565 3566 const unsigned Opc = Inst.getOpcode(); 3567 const MCInstrDesc &Desc = MII.get(Opc); 3568 3569 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3570 return None; 3571 3572 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3573 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3574 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3575 3576 assert(VDataIdx != -1); 3577 3578 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3579 return None; 3580 3581 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3582 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3583 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3584 if (DMask == 0) 3585 DMask = 1; 3586 3587 bool isPackedD16 = false; 3588 unsigned DataSize = 3589 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3590 if (hasPackedD16()) { 3591 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3592 isPackedD16 = D16Idx >= 0; 3593 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3594 DataSize = (DataSize + 1) / 2; 3595 } 3596 3597 if ((VDataSize / 4) == DataSize + TFESize) 3598 return None; 3599 3600 return StringRef(isPackedD16 3601 ? "image data size does not match dmask, d16 and tfe" 3602 : "image data size does not match dmask and tfe"); 3603 } 3604 3605 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3606 const unsigned Opc = Inst.getOpcode(); 3607 const MCInstrDesc &Desc = MII.get(Opc); 3608 3609 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3610 return true; 3611 3612 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3613 3614 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3615 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3616 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3617 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3618 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3619 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3620 3621 assert(VAddr0Idx != -1); 3622 assert(SrsrcIdx != -1); 3623 assert(SrsrcIdx > VAddr0Idx); 3624 3625 if (DimIdx == -1) 3626 return true; // intersect_ray 3627 3628 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3629 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3630 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3631 unsigned ActualAddrSize = 3632 IsNSA ? SrsrcIdx - VAddr0Idx 3633 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3634 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3635 3636 unsigned ExpectedAddrSize = 3637 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3638 3639 if (!IsNSA) { 3640 if (ExpectedAddrSize > 8) 3641 ExpectedAddrSize = 16; 3642 3643 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3644 // This provides backward compatibility for assembly created 3645 // before 160b/192b/224b types were directly supported. 3646 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3647 return true; 3648 } 3649 3650 return ActualAddrSize == ExpectedAddrSize; 3651 } 3652 3653 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3654 3655 const unsigned Opc = Inst.getOpcode(); 3656 const MCInstrDesc &Desc = MII.get(Opc); 3657 3658 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3659 return true; 3660 if (!Desc.mayLoad() || !Desc.mayStore()) 3661 return true; // Not atomic 3662 3663 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3664 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3665 3666 // This is an incomplete check because image_atomic_cmpswap 3667 // may only use 0x3 and 0xf while other atomic operations 3668 // may use 0x1 and 0x3. However these limitations are 3669 // verified when we check that dmask matches dst size. 3670 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3671 } 3672 3673 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3674 3675 const unsigned Opc = Inst.getOpcode(); 3676 const MCInstrDesc &Desc = MII.get(Opc); 3677 3678 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3679 return true; 3680 3681 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3682 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3683 3684 // GATHER4 instructions use dmask in a different fashion compared to 3685 // other MIMG instructions. The only useful DMASK values are 3686 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3687 // (red,red,red,red) etc.) The ISA document doesn't mention 3688 // this. 3689 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3690 } 3691 3692 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3693 const unsigned Opc = Inst.getOpcode(); 3694 const MCInstrDesc &Desc = MII.get(Opc); 3695 3696 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3697 return true; 3698 3699 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3700 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3701 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3702 3703 if (!BaseOpcode->MSAA) 3704 return true; 3705 3706 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3707 assert(DimIdx != -1); 3708 3709 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3710 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3711 3712 return DimInfo->MSAA; 3713 } 3714 3715 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3716 { 3717 switch (Opcode) { 3718 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3719 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3720 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3721 return true; 3722 default: 3723 return false; 3724 } 3725 } 3726 3727 // movrels* opcodes should only allow VGPRS as src0. 3728 // This is specified in .td description for vop1/vop3, 3729 // but sdwa is handled differently. See isSDWAOperand. 3730 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3731 const OperandVector &Operands) { 3732 3733 const unsigned Opc = Inst.getOpcode(); 3734 const MCInstrDesc &Desc = MII.get(Opc); 3735 3736 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3737 return true; 3738 3739 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3740 assert(Src0Idx != -1); 3741 3742 SMLoc ErrLoc; 3743 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3744 if (Src0.isReg()) { 3745 auto Reg = mc2PseudoReg(Src0.getReg()); 3746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3747 if (!isSGPR(Reg, TRI)) 3748 return true; 3749 ErrLoc = getRegLoc(Reg, Operands); 3750 } else { 3751 ErrLoc = getConstLoc(Operands); 3752 } 3753 3754 Error(ErrLoc, "source operand must be a VGPR"); 3755 return false; 3756 } 3757 3758 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3759 const OperandVector &Operands) { 3760 3761 const unsigned Opc = Inst.getOpcode(); 3762 3763 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3764 return true; 3765 3766 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3767 assert(Src0Idx != -1); 3768 3769 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3770 if (!Src0.isReg()) 3771 return true; 3772 3773 auto Reg = mc2PseudoReg(Src0.getReg()); 3774 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3775 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3776 Error(getRegLoc(Reg, Operands), 3777 "source operand must be either a VGPR or an inline constant"); 3778 return false; 3779 } 3780 3781 return true; 3782 } 3783 3784 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3785 const OperandVector &Operands) { 3786 const unsigned Opc = Inst.getOpcode(); 3787 const MCInstrDesc &Desc = MII.get(Opc); 3788 3789 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3790 return true; 3791 3792 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3793 if (Src2Idx == -1) 3794 return true; 3795 3796 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3797 if (!Src2.isReg()) 3798 return true; 3799 3800 MCRegister Src2Reg = Src2.getReg(); 3801 MCRegister DstReg = Inst.getOperand(0).getReg(); 3802 if (Src2Reg == DstReg) 3803 return true; 3804 3805 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3806 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3807 return true; 3808 3809 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3810 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3811 "source 2 operand must not partially overlap with dst"); 3812 return false; 3813 } 3814 3815 return true; 3816 } 3817 3818 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3819 switch (Inst.getOpcode()) { 3820 default: 3821 return true; 3822 case V_DIV_SCALE_F32_gfx6_gfx7: 3823 case V_DIV_SCALE_F32_vi: 3824 case V_DIV_SCALE_F32_gfx10: 3825 case V_DIV_SCALE_F64_gfx6_gfx7: 3826 case V_DIV_SCALE_F64_vi: 3827 case V_DIV_SCALE_F64_gfx10: 3828 break; 3829 } 3830 3831 // TODO: Check that src0 = src1 or src2. 3832 3833 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3834 AMDGPU::OpName::src2_modifiers, 3835 AMDGPU::OpName::src2_modifiers}) { 3836 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3837 .getImm() & 3838 SISrcMods::ABS) { 3839 return false; 3840 } 3841 } 3842 3843 return true; 3844 } 3845 3846 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3847 3848 const unsigned Opc = Inst.getOpcode(); 3849 const MCInstrDesc &Desc = MII.get(Opc); 3850 3851 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3852 return true; 3853 3854 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3855 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3856 if (isCI() || isSI()) 3857 return false; 3858 } 3859 3860 return true; 3861 } 3862 3863 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3864 const unsigned Opc = Inst.getOpcode(); 3865 const MCInstrDesc &Desc = MII.get(Opc); 3866 3867 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3868 return true; 3869 3870 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3871 if (DimIdx < 0) 3872 return true; 3873 3874 long Imm = Inst.getOperand(DimIdx).getImm(); 3875 if (Imm < 0 || Imm >= 8) 3876 return false; 3877 3878 return true; 3879 } 3880 3881 static bool IsRevOpcode(const unsigned Opcode) 3882 { 3883 switch (Opcode) { 3884 case AMDGPU::V_SUBREV_F32_e32: 3885 case AMDGPU::V_SUBREV_F32_e64: 3886 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3887 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3888 case AMDGPU::V_SUBREV_F32_e32_vi: 3889 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3890 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3891 case AMDGPU::V_SUBREV_F32_e64_vi: 3892 3893 case AMDGPU::V_SUBREV_CO_U32_e32: 3894 case AMDGPU::V_SUBREV_CO_U32_e64: 3895 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3896 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3897 3898 case AMDGPU::V_SUBBREV_U32_e32: 3899 case AMDGPU::V_SUBBREV_U32_e64: 3900 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3901 case AMDGPU::V_SUBBREV_U32_e32_vi: 3902 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3903 case AMDGPU::V_SUBBREV_U32_e64_vi: 3904 3905 case AMDGPU::V_SUBREV_U32_e32: 3906 case AMDGPU::V_SUBREV_U32_e64: 3907 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3908 case AMDGPU::V_SUBREV_U32_e32_vi: 3909 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3910 case AMDGPU::V_SUBREV_U32_e64_vi: 3911 3912 case AMDGPU::V_SUBREV_F16_e32: 3913 case AMDGPU::V_SUBREV_F16_e64: 3914 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3915 case AMDGPU::V_SUBREV_F16_e32_vi: 3916 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3917 case AMDGPU::V_SUBREV_F16_e64_vi: 3918 3919 case AMDGPU::V_SUBREV_U16_e32: 3920 case AMDGPU::V_SUBREV_U16_e64: 3921 case AMDGPU::V_SUBREV_U16_e32_vi: 3922 case AMDGPU::V_SUBREV_U16_e64_vi: 3923 3924 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3925 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3926 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3927 3928 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3929 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3930 3931 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3932 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3933 3934 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3935 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3936 3937 case AMDGPU::V_LSHRREV_B32_e32: 3938 case AMDGPU::V_LSHRREV_B32_e64: 3939 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3940 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3941 case AMDGPU::V_LSHRREV_B32_e32_vi: 3942 case AMDGPU::V_LSHRREV_B32_e64_vi: 3943 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3944 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3945 3946 case AMDGPU::V_ASHRREV_I32_e32: 3947 case AMDGPU::V_ASHRREV_I32_e64: 3948 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3949 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3950 case AMDGPU::V_ASHRREV_I32_e32_vi: 3951 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3952 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3953 case AMDGPU::V_ASHRREV_I32_e64_vi: 3954 3955 case AMDGPU::V_LSHLREV_B32_e32: 3956 case AMDGPU::V_LSHLREV_B32_e64: 3957 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3958 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3959 case AMDGPU::V_LSHLREV_B32_e32_vi: 3960 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3961 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3962 case AMDGPU::V_LSHLREV_B32_e64_vi: 3963 3964 case AMDGPU::V_LSHLREV_B16_e32: 3965 case AMDGPU::V_LSHLREV_B16_e64: 3966 case AMDGPU::V_LSHLREV_B16_e32_vi: 3967 case AMDGPU::V_LSHLREV_B16_e64_vi: 3968 case AMDGPU::V_LSHLREV_B16_gfx10: 3969 3970 case AMDGPU::V_LSHRREV_B16_e32: 3971 case AMDGPU::V_LSHRREV_B16_e64: 3972 case AMDGPU::V_LSHRREV_B16_e32_vi: 3973 case AMDGPU::V_LSHRREV_B16_e64_vi: 3974 case AMDGPU::V_LSHRREV_B16_gfx10: 3975 3976 case AMDGPU::V_ASHRREV_I16_e32: 3977 case AMDGPU::V_ASHRREV_I16_e64: 3978 case AMDGPU::V_ASHRREV_I16_e32_vi: 3979 case AMDGPU::V_ASHRREV_I16_e64_vi: 3980 case AMDGPU::V_ASHRREV_I16_gfx10: 3981 3982 case AMDGPU::V_LSHLREV_B64_e64: 3983 case AMDGPU::V_LSHLREV_B64_gfx10: 3984 case AMDGPU::V_LSHLREV_B64_vi: 3985 3986 case AMDGPU::V_LSHRREV_B64_e64: 3987 case AMDGPU::V_LSHRREV_B64_gfx10: 3988 case AMDGPU::V_LSHRREV_B64_vi: 3989 3990 case AMDGPU::V_ASHRREV_I64_e64: 3991 case AMDGPU::V_ASHRREV_I64_gfx10: 3992 case AMDGPU::V_ASHRREV_I64_vi: 3993 3994 case AMDGPU::V_PK_LSHLREV_B16: 3995 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3996 case AMDGPU::V_PK_LSHLREV_B16_vi: 3997 3998 case AMDGPU::V_PK_LSHRREV_B16: 3999 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4000 case AMDGPU::V_PK_LSHRREV_B16_vi: 4001 case AMDGPU::V_PK_ASHRREV_I16: 4002 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4003 case AMDGPU::V_PK_ASHRREV_I16_vi: 4004 return true; 4005 default: 4006 return false; 4007 } 4008 } 4009 4010 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4011 4012 using namespace SIInstrFlags; 4013 const unsigned Opcode = Inst.getOpcode(); 4014 const MCInstrDesc &Desc = MII.get(Opcode); 4015 4016 // lds_direct register is defined so that it can be used 4017 // with 9-bit operands only. Ignore encodings which do not accept these. 4018 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4019 if ((Desc.TSFlags & Enc) == 0) 4020 return None; 4021 4022 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4023 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4024 if (SrcIdx == -1) 4025 break; 4026 const auto &Src = Inst.getOperand(SrcIdx); 4027 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4028 4029 if (isGFX90A() || isGFX11Plus()) 4030 return StringRef("lds_direct is not supported on this GPU"); 4031 4032 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4033 return StringRef("lds_direct cannot be used with this instruction"); 4034 4035 if (SrcName != OpName::src0) 4036 return StringRef("lds_direct may be used as src0 only"); 4037 } 4038 } 4039 4040 return None; 4041 } 4042 4043 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4044 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4045 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4046 if (Op.isFlatOffset()) 4047 return Op.getStartLoc(); 4048 } 4049 return getLoc(); 4050 } 4051 4052 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4053 const OperandVector &Operands) { 4054 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4055 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4056 return true; 4057 4058 auto Opcode = Inst.getOpcode(); 4059 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4060 assert(OpNum != -1); 4061 4062 const auto &Op = Inst.getOperand(OpNum); 4063 if (!hasFlatOffsets() && Op.getImm() != 0) { 4064 Error(getFlatOffsetLoc(Operands), 4065 "flat offset modifier is not supported on this GPU"); 4066 return false; 4067 } 4068 4069 // For FLAT segment the offset must be positive; 4070 // MSB is ignored and forced to zero. 4071 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4072 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4073 if (!isIntN(OffsetSize, Op.getImm())) { 4074 Error(getFlatOffsetLoc(Operands), 4075 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4076 return false; 4077 } 4078 } else { 4079 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4080 if (!isUIntN(OffsetSize, Op.getImm())) { 4081 Error(getFlatOffsetLoc(Operands), 4082 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4083 return false; 4084 } 4085 } 4086 4087 return true; 4088 } 4089 4090 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4091 // Start with second operand because SMEM Offset cannot be dst or src0. 4092 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4093 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4094 if (Op.isSMEMOffset()) 4095 return Op.getStartLoc(); 4096 } 4097 return getLoc(); 4098 } 4099 4100 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4101 const OperandVector &Operands) { 4102 if (isCI() || isSI()) 4103 return true; 4104 4105 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4106 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4107 return true; 4108 4109 auto Opcode = Inst.getOpcode(); 4110 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4111 if (OpNum == -1) 4112 return true; 4113 4114 const auto &Op = Inst.getOperand(OpNum); 4115 if (!Op.isImm()) 4116 return true; 4117 4118 uint64_t Offset = Op.getImm(); 4119 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4120 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4121 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4122 return true; 4123 4124 Error(getSMEMOffsetLoc(Operands), 4125 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4126 "expected a 21-bit signed offset"); 4127 4128 return false; 4129 } 4130 4131 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4132 unsigned Opcode = Inst.getOpcode(); 4133 const MCInstrDesc &Desc = MII.get(Opcode); 4134 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4135 return true; 4136 4137 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4138 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4139 4140 const int OpIndices[] = { Src0Idx, Src1Idx }; 4141 4142 unsigned NumExprs = 0; 4143 unsigned NumLiterals = 0; 4144 uint32_t LiteralValue; 4145 4146 for (int OpIdx : OpIndices) { 4147 if (OpIdx == -1) break; 4148 4149 const MCOperand &MO = Inst.getOperand(OpIdx); 4150 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4151 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4152 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4153 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4154 if (NumLiterals == 0 || LiteralValue != Value) { 4155 LiteralValue = Value; 4156 ++NumLiterals; 4157 } 4158 } else if (MO.isExpr()) { 4159 ++NumExprs; 4160 } 4161 } 4162 } 4163 4164 return NumLiterals + NumExprs <= 1; 4165 } 4166 4167 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4168 const unsigned Opc = Inst.getOpcode(); 4169 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4170 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4171 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4172 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4173 4174 if (OpSel & ~3) 4175 return false; 4176 } 4177 4178 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4179 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4180 if (OpSelIdx != -1) { 4181 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4182 return false; 4183 } 4184 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4185 if (OpSelHiIdx != -1) { 4186 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4187 return false; 4188 } 4189 } 4190 4191 return true; 4192 } 4193 4194 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4195 const OperandVector &Operands) { 4196 const unsigned Opc = Inst.getOpcode(); 4197 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4198 if (DppCtrlIdx < 0) 4199 return true; 4200 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4201 4202 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4203 // DPP64 is supported for row_newbcast only. 4204 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4205 if (Src0Idx >= 0 && 4206 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4207 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4208 Error(S, "64 bit dpp only supports row_newbcast"); 4209 return false; 4210 } 4211 } 4212 4213 return true; 4214 } 4215 4216 // Check if VCC register matches wavefront size 4217 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4218 auto FB = getFeatureBits(); 4219 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4220 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4221 } 4222 4223 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4224 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4225 const OperandVector &Operands) { 4226 unsigned Opcode = Inst.getOpcode(); 4227 const MCInstrDesc &Desc = MII.get(Opcode); 4228 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4229 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4230 ImmIdx == -1) 4231 return true; 4232 4233 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4234 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4235 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4236 4237 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4238 4239 unsigned NumExprs = 0; 4240 unsigned NumLiterals = 0; 4241 uint32_t LiteralValue; 4242 4243 for (int OpIdx : OpIndices) { 4244 if (OpIdx == -1) 4245 continue; 4246 4247 const MCOperand &MO = Inst.getOperand(OpIdx); 4248 if (!MO.isImm() && !MO.isExpr()) 4249 continue; 4250 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4251 continue; 4252 4253 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4254 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4255 Error(getConstLoc(Operands), 4256 "inline constants are not allowed for this operand"); 4257 return false; 4258 } 4259 4260 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4261 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4262 if (NumLiterals == 0 || LiteralValue != Value) { 4263 LiteralValue = Value; 4264 ++NumLiterals; 4265 } 4266 } else if (MO.isExpr()) { 4267 ++NumExprs; 4268 } 4269 } 4270 NumLiterals += NumExprs; 4271 4272 if (!NumLiterals) 4273 return true; 4274 4275 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4276 Error(getLitLoc(Operands), "literal operands are not supported"); 4277 return false; 4278 } 4279 4280 if (NumLiterals > 1) { 4281 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4282 return false; 4283 } 4284 4285 return true; 4286 } 4287 4288 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4289 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4290 const MCRegisterInfo *MRI) { 4291 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4292 if (OpIdx < 0) 4293 return -1; 4294 4295 const MCOperand &Op = Inst.getOperand(OpIdx); 4296 if (!Op.isReg()) 4297 return -1; 4298 4299 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4300 auto Reg = Sub ? Sub : Op.getReg(); 4301 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4302 return AGPR32.contains(Reg) ? 1 : 0; 4303 } 4304 4305 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4306 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4307 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4308 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4309 SIInstrFlags::DS)) == 0) 4310 return true; 4311 4312 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4313 : AMDGPU::OpName::vdata; 4314 4315 const MCRegisterInfo *MRI = getMRI(); 4316 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4317 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4318 4319 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4320 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4321 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4322 return false; 4323 } 4324 4325 auto FB = getFeatureBits(); 4326 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4327 if (DataAreg < 0 || DstAreg < 0) 4328 return true; 4329 return DstAreg == DataAreg; 4330 } 4331 4332 return DstAreg < 1 && DataAreg < 1; 4333 } 4334 4335 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4336 auto FB = getFeatureBits(); 4337 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4338 return true; 4339 4340 const MCRegisterInfo *MRI = getMRI(); 4341 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4342 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4343 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4344 const MCOperand &Op = Inst.getOperand(I); 4345 if (!Op.isReg()) 4346 continue; 4347 4348 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4349 if (!Sub) 4350 continue; 4351 4352 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4353 return false; 4354 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4355 return false; 4356 } 4357 4358 return true; 4359 } 4360 4361 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4362 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4363 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4364 if (Op.isBLGP()) 4365 return Op.getStartLoc(); 4366 } 4367 return SMLoc(); 4368 } 4369 4370 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4371 const OperandVector &Operands) { 4372 unsigned Opc = Inst.getOpcode(); 4373 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4374 if (BlgpIdx == -1) 4375 return true; 4376 SMLoc BLGPLoc = getBLGPLoc(Operands); 4377 if (!BLGPLoc.isValid()) 4378 return true; 4379 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4380 auto FB = getFeatureBits(); 4381 bool UsesNeg = false; 4382 if (FB[AMDGPU::FeatureGFX940Insts]) { 4383 switch (Opc) { 4384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4385 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4387 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4388 UsesNeg = true; 4389 } 4390 } 4391 4392 if (IsNeg == UsesNeg) 4393 return true; 4394 4395 Error(BLGPLoc, 4396 UsesNeg ? "invalid modifier: blgp is not supported" 4397 : "invalid modifier: neg is not supported"); 4398 4399 return false; 4400 } 4401 4402 // gfx90a has an undocumented limitation: 4403 // DS_GWS opcodes must use even aligned registers. 4404 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4405 const OperandVector &Operands) { 4406 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4407 return true; 4408 4409 int Opc = Inst.getOpcode(); 4410 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4411 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4412 return true; 4413 4414 const MCRegisterInfo *MRI = getMRI(); 4415 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4416 int Data0Pos = 4417 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4418 assert(Data0Pos != -1); 4419 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4420 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4421 if (RegIdx & 1) { 4422 SMLoc RegLoc = getRegLoc(Reg, Operands); 4423 Error(RegLoc, "vgpr must be even aligned"); 4424 return false; 4425 } 4426 4427 return true; 4428 } 4429 4430 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4431 const OperandVector &Operands, 4432 const SMLoc &IDLoc) { 4433 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4434 AMDGPU::OpName::cpol); 4435 if (CPolPos == -1) 4436 return true; 4437 4438 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4439 4440 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4441 if (TSFlags & SIInstrFlags::SMRD) { 4442 if (CPol && (isSI() || isCI())) { 4443 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4444 Error(S, "cache policy is not supported for SMRD instructions"); 4445 return false; 4446 } 4447 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4448 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4449 return false; 4450 } 4451 } 4452 4453 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4454 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4455 StringRef CStr(S.getPointer()); 4456 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4457 Error(S, "scc is not supported on this GPU"); 4458 return false; 4459 } 4460 4461 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4462 return true; 4463 4464 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4465 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4466 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4467 : "instruction must use glc"); 4468 return false; 4469 } 4470 } else { 4471 if (CPol & CPol::GLC) { 4472 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4473 StringRef CStr(S.getPointer()); 4474 S = SMLoc::getFromPointer( 4475 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4476 Error(S, isGFX940() ? "instruction must not use sc0" 4477 : "instruction must not use glc"); 4478 return false; 4479 } 4480 } 4481 4482 return true; 4483 } 4484 4485 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4486 const OperandVector &Operands, 4487 const SMLoc &IDLoc) { 4488 if (isGFX940()) 4489 return true; 4490 4491 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4492 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4493 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4494 return true; 4495 // This is FLAT LDS DMA. 4496 4497 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4498 StringRef CStr(S.getPointer()); 4499 if (!CStr.startswith("lds")) { 4500 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4501 // And LDS version should have 'lds' modifier, but it follows optional 4502 // operands so its absense is ignored by the matcher. 4503 Error(IDLoc, "invalid operands for instruction"); 4504 return false; 4505 } 4506 4507 return true; 4508 } 4509 4510 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4511 if (!isGFX11Plus()) 4512 return true; 4513 for (auto &Operand : Operands) { 4514 if (!Operand->isReg()) 4515 continue; 4516 unsigned Reg = Operand->getReg(); 4517 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4518 Error(getRegLoc(Reg, Operands), 4519 "execz and vccz are not supported on this GPU"); 4520 return false; 4521 } 4522 } 4523 return true; 4524 } 4525 4526 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4527 const SMLoc &IDLoc, 4528 const OperandVector &Operands) { 4529 if (auto ErrMsg = validateLdsDirect(Inst)) { 4530 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4531 return false; 4532 } 4533 if (!validateSOPLiteral(Inst)) { 4534 Error(getLitLoc(Operands), 4535 "only one literal operand is allowed"); 4536 return false; 4537 } 4538 if (!validateVOPLiteral(Inst, Operands)) { 4539 return false; 4540 } 4541 if (!validateConstantBusLimitations(Inst, Operands)) { 4542 return false; 4543 } 4544 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4545 return false; 4546 } 4547 if (!validateIntClampSupported(Inst)) { 4548 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4549 "integer clamping is not supported on this GPU"); 4550 return false; 4551 } 4552 if (!validateOpSel(Inst)) { 4553 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4554 "invalid op_sel operand"); 4555 return false; 4556 } 4557 if (!validateDPP(Inst, Operands)) { 4558 return false; 4559 } 4560 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4561 if (!validateMIMGD16(Inst)) { 4562 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4563 "d16 modifier is not supported on this GPU"); 4564 return false; 4565 } 4566 if (!validateMIMGDim(Inst)) { 4567 Error(IDLoc, "dim modifier is required on this GPU"); 4568 return false; 4569 } 4570 if (!validateMIMGMSAA(Inst)) { 4571 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4572 "invalid dim; must be MSAA type"); 4573 return false; 4574 } 4575 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4576 Error(IDLoc, *ErrMsg); 4577 return false; 4578 } 4579 if (!validateMIMGAddrSize(Inst)) { 4580 Error(IDLoc, 4581 "image address size does not match dim and a16"); 4582 return false; 4583 } 4584 if (!validateMIMGAtomicDMask(Inst)) { 4585 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4586 "invalid atomic image dmask"); 4587 return false; 4588 } 4589 if (!validateMIMGGatherDMask(Inst)) { 4590 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4591 "invalid image_gather dmask: only one bit must be set"); 4592 return false; 4593 } 4594 if (!validateMovrels(Inst, Operands)) { 4595 return false; 4596 } 4597 if (!validateFlatOffset(Inst, Operands)) { 4598 return false; 4599 } 4600 if (!validateSMEMOffset(Inst, Operands)) { 4601 return false; 4602 } 4603 if (!validateMAIAccWrite(Inst, Operands)) { 4604 return false; 4605 } 4606 if (!validateMFMA(Inst, Operands)) { 4607 return false; 4608 } 4609 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4610 return false; 4611 } 4612 4613 if (!validateAGPRLdSt(Inst)) { 4614 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4615 ? "invalid register class: data and dst should be all VGPR or AGPR" 4616 : "invalid register class: agpr loads and stores not supported on this GPU" 4617 ); 4618 return false; 4619 } 4620 if (!validateVGPRAlign(Inst)) { 4621 Error(IDLoc, 4622 "invalid register class: vgpr tuples must be 64 bit aligned"); 4623 return false; 4624 } 4625 if (!validateGWS(Inst, Operands)) { 4626 return false; 4627 } 4628 4629 if (!validateBLGP(Inst, Operands)) { 4630 return false; 4631 } 4632 4633 if (!validateDivScale(Inst)) { 4634 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4635 return false; 4636 } 4637 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4638 return false; 4639 } 4640 if (!validateExeczVcczOperands(Operands)) { 4641 return false; 4642 } 4643 4644 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4645 return false; 4646 } 4647 4648 return true; 4649 } 4650 4651 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4652 const FeatureBitset &FBS, 4653 unsigned VariantID = 0); 4654 4655 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4656 const FeatureBitset &AvailableFeatures, 4657 unsigned VariantID); 4658 4659 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4660 const FeatureBitset &FBS) { 4661 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4662 } 4663 4664 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4665 const FeatureBitset &FBS, 4666 ArrayRef<unsigned> Variants) { 4667 for (auto Variant : Variants) { 4668 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4669 return true; 4670 } 4671 4672 return false; 4673 } 4674 4675 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4676 const SMLoc &IDLoc) { 4677 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4678 4679 // Check if requested instruction variant is supported. 4680 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4681 return false; 4682 4683 // This instruction is not supported. 4684 // Clear any other pending errors because they are no longer relevant. 4685 getParser().clearPendingErrors(); 4686 4687 // Requested instruction variant is not supported. 4688 // Check if any other variants are supported. 4689 StringRef VariantName = getMatchedVariantName(); 4690 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4691 return Error(IDLoc, 4692 Twine(VariantName, 4693 " variant of this instruction is not supported")); 4694 } 4695 4696 // Finally check if this instruction is supported on any other GPU. 4697 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4698 return Error(IDLoc, "instruction not supported on this GPU"); 4699 } 4700 4701 // Instruction not supported on any GPU. Probably a typo. 4702 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4703 return Error(IDLoc, "invalid instruction" + Suggestion); 4704 } 4705 4706 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4707 OperandVector &Operands, 4708 MCStreamer &Out, 4709 uint64_t &ErrorInfo, 4710 bool MatchingInlineAsm) { 4711 MCInst Inst; 4712 unsigned Result = Match_Success; 4713 for (auto Variant : getMatchedVariants()) { 4714 uint64_t EI; 4715 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4716 Variant); 4717 // We order match statuses from least to most specific. We use most specific 4718 // status as resulting 4719 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4720 if ((R == Match_Success) || 4721 (R == Match_PreferE32) || 4722 (R == Match_MissingFeature && Result != Match_PreferE32) || 4723 (R == Match_InvalidOperand && Result != Match_MissingFeature 4724 && Result != Match_PreferE32) || 4725 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4726 && Result != Match_MissingFeature 4727 && Result != Match_PreferE32)) { 4728 Result = R; 4729 ErrorInfo = EI; 4730 } 4731 if (R == Match_Success) 4732 break; 4733 } 4734 4735 if (Result == Match_Success) { 4736 if (!validateInstruction(Inst, IDLoc, Operands)) { 4737 return true; 4738 } 4739 Inst.setLoc(IDLoc); 4740 Out.emitInstruction(Inst, getSTI()); 4741 return false; 4742 } 4743 4744 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4745 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4746 return true; 4747 } 4748 4749 switch (Result) { 4750 default: break; 4751 case Match_MissingFeature: 4752 // It has been verified that the specified instruction 4753 // mnemonic is valid. A match was found but it requires 4754 // features which are not supported on this GPU. 4755 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4756 4757 case Match_InvalidOperand: { 4758 SMLoc ErrorLoc = IDLoc; 4759 if (ErrorInfo != ~0ULL) { 4760 if (ErrorInfo >= Operands.size()) { 4761 return Error(IDLoc, "too few operands for instruction"); 4762 } 4763 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4764 if (ErrorLoc == SMLoc()) 4765 ErrorLoc = IDLoc; 4766 } 4767 return Error(ErrorLoc, "invalid operand for instruction"); 4768 } 4769 4770 case Match_PreferE32: 4771 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4772 "should be encoded as e32"); 4773 case Match_MnemonicFail: 4774 llvm_unreachable("Invalid instructions should have been handled already"); 4775 } 4776 llvm_unreachable("Implement any new match types added!"); 4777 } 4778 4779 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4780 int64_t Tmp = -1; 4781 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4782 return true; 4783 } 4784 if (getParser().parseAbsoluteExpression(Tmp)) { 4785 return true; 4786 } 4787 Ret = static_cast<uint32_t>(Tmp); 4788 return false; 4789 } 4790 4791 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4792 uint32_t &Minor) { 4793 if (ParseAsAbsoluteExpression(Major)) 4794 return TokError("invalid major version"); 4795 4796 if (!trySkipToken(AsmToken::Comma)) 4797 return TokError("minor version number required, comma expected"); 4798 4799 if (ParseAsAbsoluteExpression(Minor)) 4800 return TokError("invalid minor version"); 4801 4802 return false; 4803 } 4804 4805 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4806 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4807 return TokError("directive only supported for amdgcn architecture"); 4808 4809 std::string TargetIDDirective; 4810 SMLoc TargetStart = getTok().getLoc(); 4811 if (getParser().parseEscapedString(TargetIDDirective)) 4812 return true; 4813 4814 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4815 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4816 return getParser().Error(TargetRange.Start, 4817 (Twine(".amdgcn_target directive's target id ") + 4818 Twine(TargetIDDirective) + 4819 Twine(" does not match the specified target id ") + 4820 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4821 4822 return false; 4823 } 4824 4825 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4826 return Error(Range.Start, "value out of range", Range); 4827 } 4828 4829 bool AMDGPUAsmParser::calculateGPRBlocks( 4830 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4831 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4832 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4833 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4834 // TODO(scott.linder): These calculations are duplicated from 4835 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4836 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4837 4838 unsigned NumVGPRs = NextFreeVGPR; 4839 unsigned NumSGPRs = NextFreeSGPR; 4840 4841 if (Version.Major >= 10) 4842 NumSGPRs = 0; 4843 else { 4844 unsigned MaxAddressableNumSGPRs = 4845 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4846 4847 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4848 NumSGPRs > MaxAddressableNumSGPRs) 4849 return OutOfRangeError(SGPRRange); 4850 4851 NumSGPRs += 4852 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4853 4854 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4855 NumSGPRs > MaxAddressableNumSGPRs) 4856 return OutOfRangeError(SGPRRange); 4857 4858 if (Features.test(FeatureSGPRInitBug)) 4859 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4860 } 4861 4862 VGPRBlocks = 4863 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4864 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4865 4866 return false; 4867 } 4868 4869 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4870 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4871 return TokError("directive only supported for amdgcn architecture"); 4872 4873 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4874 return TokError("directive only supported for amdhsa OS"); 4875 4876 StringRef KernelName; 4877 if (getParser().parseIdentifier(KernelName)) 4878 return true; 4879 4880 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4881 4882 StringSet<> Seen; 4883 4884 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4885 4886 SMRange VGPRRange; 4887 uint64_t NextFreeVGPR = 0; 4888 uint64_t AccumOffset = 0; 4889 uint64_t SharedVGPRCount = 0; 4890 SMRange SGPRRange; 4891 uint64_t NextFreeSGPR = 0; 4892 4893 // Count the number of user SGPRs implied from the enabled feature bits. 4894 unsigned ImpliedUserSGPRCount = 0; 4895 4896 // Track if the asm explicitly contains the directive for the user SGPR 4897 // count. 4898 Optional<unsigned> ExplicitUserSGPRCount; 4899 bool ReserveVCC = true; 4900 bool ReserveFlatScr = true; 4901 Optional<bool> EnableWavefrontSize32; 4902 4903 while (true) { 4904 while (trySkipToken(AsmToken::EndOfStatement)); 4905 4906 StringRef ID; 4907 SMRange IDRange = getTok().getLocRange(); 4908 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4909 return true; 4910 4911 if (ID == ".end_amdhsa_kernel") 4912 break; 4913 4914 if (Seen.find(ID) != Seen.end()) 4915 return TokError(".amdhsa_ directives cannot be repeated"); 4916 Seen.insert(ID); 4917 4918 SMLoc ValStart = getLoc(); 4919 int64_t IVal; 4920 if (getParser().parseAbsoluteExpression(IVal)) 4921 return true; 4922 SMLoc ValEnd = getLoc(); 4923 SMRange ValRange = SMRange(ValStart, ValEnd); 4924 4925 if (IVal < 0) 4926 return OutOfRangeError(ValRange); 4927 4928 uint64_t Val = IVal; 4929 4930 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4931 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4932 return OutOfRangeError(RANGE); \ 4933 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4934 4935 if (ID == ".amdhsa_group_segment_fixed_size") { 4936 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4937 return OutOfRangeError(ValRange); 4938 KD.group_segment_fixed_size = Val; 4939 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4940 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4941 return OutOfRangeError(ValRange); 4942 KD.private_segment_fixed_size = Val; 4943 } else if (ID == ".amdhsa_kernarg_size") { 4944 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4945 return OutOfRangeError(ValRange); 4946 KD.kernarg_size = Val; 4947 } else if (ID == ".amdhsa_user_sgpr_count") { 4948 ExplicitUserSGPRCount = Val; 4949 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4950 if (hasArchitectedFlatScratch()) 4951 return Error(IDRange.Start, 4952 "directive is not supported with architected flat scratch", 4953 IDRange); 4954 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4955 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4956 Val, ValRange); 4957 if (Val) 4958 ImpliedUserSGPRCount += 4; 4959 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4960 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4961 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4962 ValRange); 4963 if (Val) 4964 ImpliedUserSGPRCount += 2; 4965 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4966 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4967 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4968 ValRange); 4969 if (Val) 4970 ImpliedUserSGPRCount += 2; 4971 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4972 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4973 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4974 Val, ValRange); 4975 if (Val) 4976 ImpliedUserSGPRCount += 2; 4977 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4978 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4979 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4980 ValRange); 4981 if (Val) 4982 ImpliedUserSGPRCount += 2; 4983 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4984 if (hasArchitectedFlatScratch()) 4985 return Error(IDRange.Start, 4986 "directive is not supported with architected flat scratch", 4987 IDRange); 4988 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4989 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4990 ValRange); 4991 if (Val) 4992 ImpliedUserSGPRCount += 2; 4993 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4994 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4995 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4996 Val, ValRange); 4997 if (Val) 4998 ImpliedUserSGPRCount += 1; 4999 } else if (ID == ".amdhsa_wavefront_size32") { 5000 if (IVersion.Major < 10) 5001 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5002 EnableWavefrontSize32 = Val; 5003 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5004 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5005 Val, ValRange); 5006 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5007 if (hasArchitectedFlatScratch()) 5008 return Error(IDRange.Start, 5009 "directive is not supported with architected flat scratch", 5010 IDRange); 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5012 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5013 } else if (ID == ".amdhsa_enable_private_segment") { 5014 if (!hasArchitectedFlatScratch()) 5015 return Error( 5016 IDRange.Start, 5017 "directive is not supported without architected flat scratch", 5018 IDRange); 5019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5020 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5021 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5023 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5024 ValRange); 5025 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5027 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5028 ValRange); 5029 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5031 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5032 ValRange); 5033 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5035 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5036 ValRange); 5037 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5039 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5040 ValRange); 5041 } else if (ID == ".amdhsa_next_free_vgpr") { 5042 VGPRRange = ValRange; 5043 NextFreeVGPR = Val; 5044 } else if (ID == ".amdhsa_next_free_sgpr") { 5045 SGPRRange = ValRange; 5046 NextFreeSGPR = Val; 5047 } else if (ID == ".amdhsa_accum_offset") { 5048 if (!isGFX90A()) 5049 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5050 AccumOffset = Val; 5051 } else if (ID == ".amdhsa_reserve_vcc") { 5052 if (!isUInt<1>(Val)) 5053 return OutOfRangeError(ValRange); 5054 ReserveVCC = Val; 5055 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5056 if (IVersion.Major < 7) 5057 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5058 if (hasArchitectedFlatScratch()) 5059 return Error(IDRange.Start, 5060 "directive is not supported with architected flat scratch", 5061 IDRange); 5062 if (!isUInt<1>(Val)) 5063 return OutOfRangeError(ValRange); 5064 ReserveFlatScr = Val; 5065 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5066 if (IVersion.Major < 8) 5067 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5068 if (!isUInt<1>(Val)) 5069 return OutOfRangeError(ValRange); 5070 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5071 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5072 IDRange); 5073 } else if (ID == ".amdhsa_float_round_mode_32") { 5074 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5075 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5076 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5077 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5078 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5079 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5081 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5082 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5083 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5084 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5085 ValRange); 5086 } else if (ID == ".amdhsa_dx10_clamp") { 5087 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5088 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5089 } else if (ID == ".amdhsa_ieee_mode") { 5090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5091 Val, ValRange); 5092 } else if (ID == ".amdhsa_fp16_overflow") { 5093 if (IVersion.Major < 9) 5094 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5095 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5096 ValRange); 5097 } else if (ID == ".amdhsa_tg_split") { 5098 if (!isGFX90A()) 5099 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5100 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5101 ValRange); 5102 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5103 if (IVersion.Major < 10) 5104 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5105 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5106 ValRange); 5107 } else if (ID == ".amdhsa_memory_ordered") { 5108 if (IVersion.Major < 10) 5109 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5110 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5111 ValRange); 5112 } else if (ID == ".amdhsa_forward_progress") { 5113 if (IVersion.Major < 10) 5114 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5115 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5116 ValRange); 5117 } else if (ID == ".amdhsa_shared_vgpr_count") { 5118 if (IVersion.Major < 10) 5119 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5120 SharedVGPRCount = Val; 5121 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5122 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5123 ValRange); 5124 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5125 PARSE_BITS_ENTRY( 5126 KD.compute_pgm_rsrc2, 5127 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5128 ValRange); 5129 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5131 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5132 Val, ValRange); 5133 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5134 PARSE_BITS_ENTRY( 5135 KD.compute_pgm_rsrc2, 5136 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5137 ValRange); 5138 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5139 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5140 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5141 Val, ValRange); 5142 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5143 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5144 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5145 Val, ValRange); 5146 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5149 Val, ValRange); 5150 } else if (ID == ".amdhsa_exception_int_div_zero") { 5151 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5153 Val, ValRange); 5154 } else { 5155 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5156 } 5157 5158 #undef PARSE_BITS_ENTRY 5159 } 5160 5161 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5162 return TokError(".amdhsa_next_free_vgpr directive is required"); 5163 5164 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5165 return TokError(".amdhsa_next_free_sgpr directive is required"); 5166 5167 unsigned VGPRBlocks; 5168 unsigned SGPRBlocks; 5169 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5170 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5171 EnableWavefrontSize32, NextFreeVGPR, 5172 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5173 SGPRBlocks)) 5174 return true; 5175 5176 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5177 VGPRBlocks)) 5178 return OutOfRangeError(VGPRRange); 5179 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5180 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5181 5182 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5183 SGPRBlocks)) 5184 return OutOfRangeError(SGPRRange); 5185 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5186 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5187 SGPRBlocks); 5188 5189 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5190 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5191 "enabled user SGPRs"); 5192 5193 unsigned UserSGPRCount = 5194 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5195 5196 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5197 return TokError("too many user SGPRs enabled"); 5198 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5199 UserSGPRCount); 5200 5201 if (isGFX90A()) { 5202 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5203 return TokError(".amdhsa_accum_offset directive is required"); 5204 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5205 return TokError("accum_offset should be in range [4..256] in " 5206 "increments of 4"); 5207 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5208 return TokError("accum_offset exceeds total VGPR allocation"); 5209 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5210 (AccumOffset / 4 - 1)); 5211 } 5212 5213 if (IVersion.Major == 10) { 5214 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5215 if (SharedVGPRCount && EnableWavefrontSize32) { 5216 return TokError("shared_vgpr_count directive not valid on " 5217 "wavefront size 32"); 5218 } 5219 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5220 return TokError("shared_vgpr_count*2 + " 5221 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5222 "exceed 63\n"); 5223 } 5224 } 5225 5226 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5227 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5228 ReserveFlatScr); 5229 return false; 5230 } 5231 5232 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5233 uint32_t Major; 5234 uint32_t Minor; 5235 5236 if (ParseDirectiveMajorMinor(Major, Minor)) 5237 return true; 5238 5239 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5240 return false; 5241 } 5242 5243 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5244 uint32_t Major; 5245 uint32_t Minor; 5246 uint32_t Stepping; 5247 StringRef VendorName; 5248 StringRef ArchName; 5249 5250 // If this directive has no arguments, then use the ISA version for the 5251 // targeted GPU. 5252 if (isToken(AsmToken::EndOfStatement)) { 5253 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5254 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5255 ISA.Stepping, 5256 "AMD", "AMDGPU"); 5257 return false; 5258 } 5259 5260 if (ParseDirectiveMajorMinor(Major, Minor)) 5261 return true; 5262 5263 if (!trySkipToken(AsmToken::Comma)) 5264 return TokError("stepping version number required, comma expected"); 5265 5266 if (ParseAsAbsoluteExpression(Stepping)) 5267 return TokError("invalid stepping version"); 5268 5269 if (!trySkipToken(AsmToken::Comma)) 5270 return TokError("vendor name required, comma expected"); 5271 5272 if (!parseString(VendorName, "invalid vendor name")) 5273 return true; 5274 5275 if (!trySkipToken(AsmToken::Comma)) 5276 return TokError("arch name required, comma expected"); 5277 5278 if (!parseString(ArchName, "invalid arch name")) 5279 return true; 5280 5281 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5282 VendorName, ArchName); 5283 return false; 5284 } 5285 5286 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5287 amd_kernel_code_t &Header) { 5288 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5289 // assembly for backwards compatibility. 5290 if (ID == "max_scratch_backing_memory_byte_size") { 5291 Parser.eatToEndOfStatement(); 5292 return false; 5293 } 5294 5295 SmallString<40> ErrStr; 5296 raw_svector_ostream Err(ErrStr); 5297 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5298 return TokError(Err.str()); 5299 } 5300 Lex(); 5301 5302 if (ID == "enable_wavefront_size32") { 5303 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5304 if (!isGFX10Plus()) 5305 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5306 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5307 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5308 } else { 5309 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5310 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5311 } 5312 } 5313 5314 if (ID == "wavefront_size") { 5315 if (Header.wavefront_size == 5) { 5316 if (!isGFX10Plus()) 5317 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5318 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5319 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5320 } else if (Header.wavefront_size == 6) { 5321 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5322 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5323 } 5324 } 5325 5326 if (ID == "enable_wgp_mode") { 5327 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5328 !isGFX10Plus()) 5329 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5330 } 5331 5332 if (ID == "enable_mem_ordered") { 5333 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5334 !isGFX10Plus()) 5335 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5336 } 5337 5338 if (ID == "enable_fwd_progress") { 5339 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5340 !isGFX10Plus()) 5341 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5342 } 5343 5344 return false; 5345 } 5346 5347 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5348 amd_kernel_code_t Header; 5349 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5350 5351 while (true) { 5352 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5353 // will set the current token to EndOfStatement. 5354 while(trySkipToken(AsmToken::EndOfStatement)); 5355 5356 StringRef ID; 5357 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5358 return true; 5359 5360 if (ID == ".end_amd_kernel_code_t") 5361 break; 5362 5363 if (ParseAMDKernelCodeTValue(ID, Header)) 5364 return true; 5365 } 5366 5367 getTargetStreamer().EmitAMDKernelCodeT(Header); 5368 5369 return false; 5370 } 5371 5372 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5373 StringRef KernelName; 5374 if (!parseId(KernelName, "expected symbol name")) 5375 return true; 5376 5377 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5378 ELF::STT_AMDGPU_HSA_KERNEL); 5379 5380 KernelScope.initialize(getContext()); 5381 return false; 5382 } 5383 5384 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5385 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5386 return Error(getLoc(), 5387 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5388 "architectures"); 5389 } 5390 5391 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5392 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5393 return Error(getParser().getTok().getLoc(), "target id must match options"); 5394 5395 getTargetStreamer().EmitISAVersion(); 5396 Lex(); 5397 5398 return false; 5399 } 5400 5401 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5402 const char *AssemblerDirectiveBegin; 5403 const char *AssemblerDirectiveEnd; 5404 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5405 isHsaAbiVersion3AndAbove(&getSTI()) 5406 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5407 HSAMD::V3::AssemblerDirectiveEnd) 5408 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5409 HSAMD::AssemblerDirectiveEnd); 5410 5411 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5412 return Error(getLoc(), 5413 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5414 "not available on non-amdhsa OSes")).str()); 5415 } 5416 5417 std::string HSAMetadataString; 5418 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5419 HSAMetadataString)) 5420 return true; 5421 5422 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5423 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5424 return Error(getLoc(), "invalid HSA metadata"); 5425 } else { 5426 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5427 return Error(getLoc(), "invalid HSA metadata"); 5428 } 5429 5430 return false; 5431 } 5432 5433 /// Common code to parse out a block of text (typically YAML) between start and 5434 /// end directives. 5435 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5436 const char *AssemblerDirectiveEnd, 5437 std::string &CollectString) { 5438 5439 raw_string_ostream CollectStream(CollectString); 5440 5441 getLexer().setSkipSpace(false); 5442 5443 bool FoundEnd = false; 5444 while (!isToken(AsmToken::Eof)) { 5445 while (isToken(AsmToken::Space)) { 5446 CollectStream << getTokenStr(); 5447 Lex(); 5448 } 5449 5450 if (trySkipId(AssemblerDirectiveEnd)) { 5451 FoundEnd = true; 5452 break; 5453 } 5454 5455 CollectStream << Parser.parseStringToEndOfStatement() 5456 << getContext().getAsmInfo()->getSeparatorString(); 5457 5458 Parser.eatToEndOfStatement(); 5459 } 5460 5461 getLexer().setSkipSpace(true); 5462 5463 if (isToken(AsmToken::Eof) && !FoundEnd) { 5464 return TokError(Twine("expected directive ") + 5465 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5466 } 5467 5468 CollectStream.flush(); 5469 return false; 5470 } 5471 5472 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5473 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5474 std::string String; 5475 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5476 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5477 return true; 5478 5479 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5480 if (!PALMetadata->setFromString(String)) 5481 return Error(getLoc(), "invalid PAL metadata"); 5482 return false; 5483 } 5484 5485 /// Parse the assembler directive for old linear-format PAL metadata. 5486 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5487 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5488 return Error(getLoc(), 5489 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5490 "not available on non-amdpal OSes")).str()); 5491 } 5492 5493 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5494 PALMetadata->setLegacy(); 5495 for (;;) { 5496 uint32_t Key, Value; 5497 if (ParseAsAbsoluteExpression(Key)) { 5498 return TokError(Twine("invalid value in ") + 5499 Twine(PALMD::AssemblerDirective)); 5500 } 5501 if (!trySkipToken(AsmToken::Comma)) { 5502 return TokError(Twine("expected an even number of values in ") + 5503 Twine(PALMD::AssemblerDirective)); 5504 } 5505 if (ParseAsAbsoluteExpression(Value)) { 5506 return TokError(Twine("invalid value in ") + 5507 Twine(PALMD::AssemblerDirective)); 5508 } 5509 PALMetadata->setRegister(Key, Value); 5510 if (!trySkipToken(AsmToken::Comma)) 5511 break; 5512 } 5513 return false; 5514 } 5515 5516 /// ParseDirectiveAMDGPULDS 5517 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5518 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5519 if (getParser().checkForValidSection()) 5520 return true; 5521 5522 StringRef Name; 5523 SMLoc NameLoc = getLoc(); 5524 if (getParser().parseIdentifier(Name)) 5525 return TokError("expected identifier in directive"); 5526 5527 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5528 if (parseToken(AsmToken::Comma, "expected ','")) 5529 return true; 5530 5531 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5532 5533 int64_t Size; 5534 SMLoc SizeLoc = getLoc(); 5535 if (getParser().parseAbsoluteExpression(Size)) 5536 return true; 5537 if (Size < 0) 5538 return Error(SizeLoc, "size must be non-negative"); 5539 if (Size > LocalMemorySize) 5540 return Error(SizeLoc, "size is too large"); 5541 5542 int64_t Alignment = 4; 5543 if (trySkipToken(AsmToken::Comma)) { 5544 SMLoc AlignLoc = getLoc(); 5545 if (getParser().parseAbsoluteExpression(Alignment)) 5546 return true; 5547 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5548 return Error(AlignLoc, "alignment must be a power of two"); 5549 5550 // Alignment larger than the size of LDS is possible in theory, as long 5551 // as the linker manages to place to symbol at address 0, but we do want 5552 // to make sure the alignment fits nicely into a 32-bit integer. 5553 if (Alignment >= 1u << 31) 5554 return Error(AlignLoc, "alignment is too large"); 5555 } 5556 5557 if (parseEOL()) 5558 return true; 5559 5560 Symbol->redefineIfPossible(); 5561 if (!Symbol->isUndefined()) 5562 return Error(NameLoc, "invalid symbol redefinition"); 5563 5564 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5565 return false; 5566 } 5567 5568 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5569 StringRef IDVal = DirectiveID.getString(); 5570 5571 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5572 if (IDVal == ".amdhsa_kernel") 5573 return ParseDirectiveAMDHSAKernel(); 5574 5575 // TODO: Restructure/combine with PAL metadata directive. 5576 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5577 return ParseDirectiveHSAMetadata(); 5578 } else { 5579 if (IDVal == ".hsa_code_object_version") 5580 return ParseDirectiveHSACodeObjectVersion(); 5581 5582 if (IDVal == ".hsa_code_object_isa") 5583 return ParseDirectiveHSACodeObjectISA(); 5584 5585 if (IDVal == ".amd_kernel_code_t") 5586 return ParseDirectiveAMDKernelCodeT(); 5587 5588 if (IDVal == ".amdgpu_hsa_kernel") 5589 return ParseDirectiveAMDGPUHsaKernel(); 5590 5591 if (IDVal == ".amd_amdgpu_isa") 5592 return ParseDirectiveISAVersion(); 5593 5594 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5595 return ParseDirectiveHSAMetadata(); 5596 } 5597 5598 if (IDVal == ".amdgcn_target") 5599 return ParseDirectiveAMDGCNTarget(); 5600 5601 if (IDVal == ".amdgpu_lds") 5602 return ParseDirectiveAMDGPULDS(); 5603 5604 if (IDVal == PALMD::AssemblerDirectiveBegin) 5605 return ParseDirectivePALMetadataBegin(); 5606 5607 if (IDVal == PALMD::AssemblerDirective) 5608 return ParseDirectivePALMetadata(); 5609 5610 return true; 5611 } 5612 5613 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5614 unsigned RegNo) { 5615 5616 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5617 return isGFX9Plus(); 5618 5619 // GFX10+ has 2 more SGPRs 104 and 105. 5620 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5621 return hasSGPR104_SGPR105(); 5622 5623 switch (RegNo) { 5624 case AMDGPU::SRC_SHARED_BASE: 5625 case AMDGPU::SRC_SHARED_LIMIT: 5626 case AMDGPU::SRC_PRIVATE_BASE: 5627 case AMDGPU::SRC_PRIVATE_LIMIT: 5628 return isGFX9Plus(); 5629 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5630 return isGFX9Plus() && !isGFX11Plus(); 5631 case AMDGPU::TBA: 5632 case AMDGPU::TBA_LO: 5633 case AMDGPU::TBA_HI: 5634 case AMDGPU::TMA: 5635 case AMDGPU::TMA_LO: 5636 case AMDGPU::TMA_HI: 5637 return !isGFX9Plus(); 5638 case AMDGPU::XNACK_MASK: 5639 case AMDGPU::XNACK_MASK_LO: 5640 case AMDGPU::XNACK_MASK_HI: 5641 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5642 case AMDGPU::SGPR_NULL: 5643 return isGFX10Plus(); 5644 default: 5645 break; 5646 } 5647 5648 if (isCI()) 5649 return true; 5650 5651 if (isSI() || isGFX10Plus()) { 5652 // No flat_scr on SI. 5653 // On GFX10Plus flat scratch is not a valid register operand and can only be 5654 // accessed with s_setreg/s_getreg. 5655 switch (RegNo) { 5656 case AMDGPU::FLAT_SCR: 5657 case AMDGPU::FLAT_SCR_LO: 5658 case AMDGPU::FLAT_SCR_HI: 5659 return false; 5660 default: 5661 return true; 5662 } 5663 } 5664 5665 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5666 // SI/CI have. 5667 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5668 return hasSGPR102_SGPR103(); 5669 5670 return true; 5671 } 5672 5673 OperandMatchResultTy 5674 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5675 OperandMode Mode) { 5676 // Try to parse with a custom parser 5677 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5678 5679 // If we successfully parsed the operand or if there as an error parsing, 5680 // we are done. 5681 // 5682 // If we are parsing after we reach EndOfStatement then this means we 5683 // are appending default values to the Operands list. This is only done 5684 // by custom parser, so we shouldn't continue on to the generic parsing. 5685 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5686 isToken(AsmToken::EndOfStatement)) 5687 return ResTy; 5688 5689 SMLoc RBraceLoc; 5690 SMLoc LBraceLoc = getLoc(); 5691 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5692 unsigned Prefix = Operands.size(); 5693 5694 for (;;) { 5695 auto Loc = getLoc(); 5696 ResTy = parseReg(Operands); 5697 if (ResTy == MatchOperand_NoMatch) 5698 Error(Loc, "expected a register"); 5699 if (ResTy != MatchOperand_Success) 5700 return MatchOperand_ParseFail; 5701 5702 RBraceLoc = getLoc(); 5703 if (trySkipToken(AsmToken::RBrac)) 5704 break; 5705 5706 if (!skipToken(AsmToken::Comma, 5707 "expected a comma or a closing square bracket")) { 5708 return MatchOperand_ParseFail; 5709 } 5710 } 5711 5712 if (Operands.size() - Prefix > 1) { 5713 Operands.insert(Operands.begin() + Prefix, 5714 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5715 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5716 } 5717 5718 return MatchOperand_Success; 5719 } 5720 5721 return parseRegOrImm(Operands); 5722 } 5723 5724 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5725 // Clear any forced encodings from the previous instruction. 5726 setForcedEncodingSize(0); 5727 setForcedDPP(false); 5728 setForcedSDWA(false); 5729 5730 if (Name.endswith("_e64_dpp")) { 5731 setForcedDPP(true); 5732 setForcedEncodingSize(64); 5733 return Name.substr(0, Name.size() - 8); 5734 } else if (Name.endswith("_e64")) { 5735 setForcedEncodingSize(64); 5736 return Name.substr(0, Name.size() - 4); 5737 } else if (Name.endswith("_e32")) { 5738 setForcedEncodingSize(32); 5739 return Name.substr(0, Name.size() - 4); 5740 } else if (Name.endswith("_dpp")) { 5741 setForcedDPP(true); 5742 return Name.substr(0, Name.size() - 4); 5743 } else if (Name.endswith("_sdwa")) { 5744 setForcedSDWA(true); 5745 return Name.substr(0, Name.size() - 5); 5746 } 5747 return Name; 5748 } 5749 5750 static void applyMnemonicAliases(StringRef &Mnemonic, 5751 const FeatureBitset &Features, 5752 unsigned VariantID); 5753 5754 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5755 StringRef Name, 5756 SMLoc NameLoc, OperandVector &Operands) { 5757 // Add the instruction mnemonic 5758 Name = parseMnemonicSuffix(Name); 5759 5760 // If the target architecture uses MnemonicAlias, call it here to parse 5761 // operands correctly. 5762 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5763 5764 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5765 5766 bool IsMIMG = Name.startswith("image_"); 5767 5768 while (!trySkipToken(AsmToken::EndOfStatement)) { 5769 OperandMode Mode = OperandMode_Default; 5770 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5771 Mode = OperandMode_NSA; 5772 CPolSeen = 0; 5773 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5774 5775 if (Res != MatchOperand_Success) { 5776 checkUnsupportedInstruction(Name, NameLoc); 5777 if (!Parser.hasPendingError()) { 5778 // FIXME: use real operand location rather than the current location. 5779 StringRef Msg = 5780 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5781 "not a valid operand."; 5782 Error(getLoc(), Msg); 5783 } 5784 while (!trySkipToken(AsmToken::EndOfStatement)) { 5785 lex(); 5786 } 5787 return true; 5788 } 5789 5790 // Eat the comma or space if there is one. 5791 trySkipToken(AsmToken::Comma); 5792 } 5793 5794 return false; 5795 } 5796 5797 //===----------------------------------------------------------------------===// 5798 // Utility functions 5799 //===----------------------------------------------------------------------===// 5800 5801 OperandMatchResultTy 5802 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5803 5804 if (!trySkipId(Prefix, AsmToken::Colon)) 5805 return MatchOperand_NoMatch; 5806 5807 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5808 } 5809 5810 OperandMatchResultTy 5811 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5812 AMDGPUOperand::ImmTy ImmTy, 5813 bool (*ConvertResult)(int64_t&)) { 5814 SMLoc S = getLoc(); 5815 int64_t Value = 0; 5816 5817 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5818 if (Res != MatchOperand_Success) 5819 return Res; 5820 5821 if (ConvertResult && !ConvertResult(Value)) { 5822 Error(S, "invalid " + StringRef(Prefix) + " value."); 5823 } 5824 5825 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5826 return MatchOperand_Success; 5827 } 5828 5829 OperandMatchResultTy 5830 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5831 OperandVector &Operands, 5832 AMDGPUOperand::ImmTy ImmTy, 5833 bool (*ConvertResult)(int64_t&)) { 5834 SMLoc S = getLoc(); 5835 if (!trySkipId(Prefix, AsmToken::Colon)) 5836 return MatchOperand_NoMatch; 5837 5838 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5839 return MatchOperand_ParseFail; 5840 5841 unsigned Val = 0; 5842 const unsigned MaxSize = 4; 5843 5844 // FIXME: How to verify the number of elements matches the number of src 5845 // operands? 5846 for (int I = 0; ; ++I) { 5847 int64_t Op; 5848 SMLoc Loc = getLoc(); 5849 if (!parseExpr(Op)) 5850 return MatchOperand_ParseFail; 5851 5852 if (Op != 0 && Op != 1) { 5853 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5854 return MatchOperand_ParseFail; 5855 } 5856 5857 Val |= (Op << I); 5858 5859 if (trySkipToken(AsmToken::RBrac)) 5860 break; 5861 5862 if (I + 1 == MaxSize) { 5863 Error(getLoc(), "expected a closing square bracket"); 5864 return MatchOperand_ParseFail; 5865 } 5866 5867 if (!skipToken(AsmToken::Comma, "expected a comma")) 5868 return MatchOperand_ParseFail; 5869 } 5870 5871 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5872 return MatchOperand_Success; 5873 } 5874 5875 OperandMatchResultTy 5876 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5877 AMDGPUOperand::ImmTy ImmTy) { 5878 int64_t Bit; 5879 SMLoc S = getLoc(); 5880 5881 if (trySkipId(Name)) { 5882 Bit = 1; 5883 } else if (trySkipId("no", Name)) { 5884 Bit = 0; 5885 } else { 5886 return MatchOperand_NoMatch; 5887 } 5888 5889 if (Name == "r128" && !hasMIMG_R128()) { 5890 Error(S, "r128 modifier is not supported on this GPU"); 5891 return MatchOperand_ParseFail; 5892 } 5893 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5894 Error(S, "a16 modifier is not supported on this GPU"); 5895 return MatchOperand_ParseFail; 5896 } 5897 5898 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5899 ImmTy = AMDGPUOperand::ImmTyR128A16; 5900 5901 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5902 return MatchOperand_Success; 5903 } 5904 5905 OperandMatchResultTy 5906 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5907 unsigned CPolOn = 0; 5908 unsigned CPolOff = 0; 5909 SMLoc S = getLoc(); 5910 5911 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5912 if (isGFX940() && !Mnemo.startswith("s_")) { 5913 if (trySkipId("sc0")) 5914 CPolOn = AMDGPU::CPol::SC0; 5915 else if (trySkipId("nosc0")) 5916 CPolOff = AMDGPU::CPol::SC0; 5917 else if (trySkipId("nt")) 5918 CPolOn = AMDGPU::CPol::NT; 5919 else if (trySkipId("nont")) 5920 CPolOff = AMDGPU::CPol::NT; 5921 else if (trySkipId("sc1")) 5922 CPolOn = AMDGPU::CPol::SC1; 5923 else if (trySkipId("nosc1")) 5924 CPolOff = AMDGPU::CPol::SC1; 5925 else 5926 return MatchOperand_NoMatch; 5927 } 5928 else if (trySkipId("glc")) 5929 CPolOn = AMDGPU::CPol::GLC; 5930 else if (trySkipId("noglc")) 5931 CPolOff = AMDGPU::CPol::GLC; 5932 else if (trySkipId("slc")) 5933 CPolOn = AMDGPU::CPol::SLC; 5934 else if (trySkipId("noslc")) 5935 CPolOff = AMDGPU::CPol::SLC; 5936 else if (trySkipId("dlc")) 5937 CPolOn = AMDGPU::CPol::DLC; 5938 else if (trySkipId("nodlc")) 5939 CPolOff = AMDGPU::CPol::DLC; 5940 else if (trySkipId("scc")) 5941 CPolOn = AMDGPU::CPol::SCC; 5942 else if (trySkipId("noscc")) 5943 CPolOff = AMDGPU::CPol::SCC; 5944 else 5945 return MatchOperand_NoMatch; 5946 5947 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5948 Error(S, "dlc modifier is not supported on this GPU"); 5949 return MatchOperand_ParseFail; 5950 } 5951 5952 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5953 Error(S, "scc modifier is not supported on this GPU"); 5954 return MatchOperand_ParseFail; 5955 } 5956 5957 if (CPolSeen & (CPolOn | CPolOff)) { 5958 Error(S, "duplicate cache policy modifier"); 5959 return MatchOperand_ParseFail; 5960 } 5961 5962 CPolSeen |= (CPolOn | CPolOff); 5963 5964 for (unsigned I = 1; I != Operands.size(); ++I) { 5965 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5966 if (Op.isCPol()) { 5967 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5968 return MatchOperand_Success; 5969 } 5970 } 5971 5972 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5973 AMDGPUOperand::ImmTyCPol)); 5974 5975 return MatchOperand_Success; 5976 } 5977 5978 static void addOptionalImmOperand( 5979 MCInst& Inst, const OperandVector& Operands, 5980 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5981 AMDGPUOperand::ImmTy ImmT, 5982 int64_t Default = 0) { 5983 auto i = OptionalIdx.find(ImmT); 5984 if (i != OptionalIdx.end()) { 5985 unsigned Idx = i->second; 5986 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5987 } else { 5988 Inst.addOperand(MCOperand::createImm(Default)); 5989 } 5990 } 5991 5992 OperandMatchResultTy 5993 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5994 StringRef &Value, 5995 SMLoc &StringLoc) { 5996 if (!trySkipId(Prefix, AsmToken::Colon)) 5997 return MatchOperand_NoMatch; 5998 5999 StringLoc = getLoc(); 6000 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6001 : MatchOperand_ParseFail; 6002 } 6003 6004 //===----------------------------------------------------------------------===// 6005 // MTBUF format 6006 //===----------------------------------------------------------------------===// 6007 6008 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6009 int64_t MaxVal, 6010 int64_t &Fmt) { 6011 int64_t Val; 6012 SMLoc Loc = getLoc(); 6013 6014 auto Res = parseIntWithPrefix(Pref, Val); 6015 if (Res == MatchOperand_ParseFail) 6016 return false; 6017 if (Res == MatchOperand_NoMatch) 6018 return true; 6019 6020 if (Val < 0 || Val > MaxVal) { 6021 Error(Loc, Twine("out of range ", StringRef(Pref))); 6022 return false; 6023 } 6024 6025 Fmt = Val; 6026 return true; 6027 } 6028 6029 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6030 // values to live in a joint format operand in the MCInst encoding. 6031 OperandMatchResultTy 6032 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6033 using namespace llvm::AMDGPU::MTBUFFormat; 6034 6035 int64_t Dfmt = DFMT_UNDEF; 6036 int64_t Nfmt = NFMT_UNDEF; 6037 6038 // dfmt and nfmt can appear in either order, and each is optional. 6039 for (int I = 0; I < 2; ++I) { 6040 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6041 return MatchOperand_ParseFail; 6042 6043 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6044 return MatchOperand_ParseFail; 6045 } 6046 // Skip optional comma between dfmt/nfmt 6047 // but guard against 2 commas following each other. 6048 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6049 !peekToken().is(AsmToken::Comma)) { 6050 trySkipToken(AsmToken::Comma); 6051 } 6052 } 6053 6054 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6055 return MatchOperand_NoMatch; 6056 6057 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6058 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6059 6060 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6061 return MatchOperand_Success; 6062 } 6063 6064 OperandMatchResultTy 6065 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6066 using namespace llvm::AMDGPU::MTBUFFormat; 6067 6068 int64_t Fmt = UFMT_UNDEF; 6069 6070 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6071 return MatchOperand_ParseFail; 6072 6073 if (Fmt == UFMT_UNDEF) 6074 return MatchOperand_NoMatch; 6075 6076 Format = Fmt; 6077 return MatchOperand_Success; 6078 } 6079 6080 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6081 int64_t &Nfmt, 6082 StringRef FormatStr, 6083 SMLoc Loc) { 6084 using namespace llvm::AMDGPU::MTBUFFormat; 6085 int64_t Format; 6086 6087 Format = getDfmt(FormatStr); 6088 if (Format != DFMT_UNDEF) { 6089 Dfmt = Format; 6090 return true; 6091 } 6092 6093 Format = getNfmt(FormatStr, getSTI()); 6094 if (Format != NFMT_UNDEF) { 6095 Nfmt = Format; 6096 return true; 6097 } 6098 6099 Error(Loc, "unsupported format"); 6100 return false; 6101 } 6102 6103 OperandMatchResultTy 6104 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6105 SMLoc FormatLoc, 6106 int64_t &Format) { 6107 using namespace llvm::AMDGPU::MTBUFFormat; 6108 6109 int64_t Dfmt = DFMT_UNDEF; 6110 int64_t Nfmt = NFMT_UNDEF; 6111 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6112 return MatchOperand_ParseFail; 6113 6114 if (trySkipToken(AsmToken::Comma)) { 6115 StringRef Str; 6116 SMLoc Loc = getLoc(); 6117 if (!parseId(Str, "expected a format string") || 6118 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6119 return MatchOperand_ParseFail; 6120 } 6121 if (Dfmt == DFMT_UNDEF) { 6122 Error(Loc, "duplicate numeric format"); 6123 return MatchOperand_ParseFail; 6124 } else if (Nfmt == NFMT_UNDEF) { 6125 Error(Loc, "duplicate data format"); 6126 return MatchOperand_ParseFail; 6127 } 6128 } 6129 6130 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6131 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6132 6133 if (isGFX10Plus()) { 6134 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6135 if (Ufmt == UFMT_UNDEF) { 6136 Error(FormatLoc, "unsupported format"); 6137 return MatchOperand_ParseFail; 6138 } 6139 Format = Ufmt; 6140 } else { 6141 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6142 } 6143 6144 return MatchOperand_Success; 6145 } 6146 6147 OperandMatchResultTy 6148 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6149 SMLoc Loc, 6150 int64_t &Format) { 6151 using namespace llvm::AMDGPU::MTBUFFormat; 6152 6153 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6154 if (Id == UFMT_UNDEF) 6155 return MatchOperand_NoMatch; 6156 6157 if (!isGFX10Plus()) { 6158 Error(Loc, "unified format is not supported on this GPU"); 6159 return MatchOperand_ParseFail; 6160 } 6161 6162 Format = Id; 6163 return MatchOperand_Success; 6164 } 6165 6166 OperandMatchResultTy 6167 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6168 using namespace llvm::AMDGPU::MTBUFFormat; 6169 SMLoc Loc = getLoc(); 6170 6171 if (!parseExpr(Format)) 6172 return MatchOperand_ParseFail; 6173 if (!isValidFormatEncoding(Format, getSTI())) { 6174 Error(Loc, "out of range format"); 6175 return MatchOperand_ParseFail; 6176 } 6177 6178 return MatchOperand_Success; 6179 } 6180 6181 OperandMatchResultTy 6182 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6183 using namespace llvm::AMDGPU::MTBUFFormat; 6184 6185 if (!trySkipId("format", AsmToken::Colon)) 6186 return MatchOperand_NoMatch; 6187 6188 if (trySkipToken(AsmToken::LBrac)) { 6189 StringRef FormatStr; 6190 SMLoc Loc = getLoc(); 6191 if (!parseId(FormatStr, "expected a format string")) 6192 return MatchOperand_ParseFail; 6193 6194 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6195 if (Res == MatchOperand_NoMatch) 6196 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6197 if (Res != MatchOperand_Success) 6198 return Res; 6199 6200 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6201 return MatchOperand_ParseFail; 6202 6203 return MatchOperand_Success; 6204 } 6205 6206 return parseNumericFormat(Format); 6207 } 6208 6209 OperandMatchResultTy 6210 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6211 using namespace llvm::AMDGPU::MTBUFFormat; 6212 6213 int64_t Format = getDefaultFormatEncoding(getSTI()); 6214 OperandMatchResultTy Res; 6215 SMLoc Loc = getLoc(); 6216 6217 // Parse legacy format syntax. 6218 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6219 if (Res == MatchOperand_ParseFail) 6220 return Res; 6221 6222 bool FormatFound = (Res == MatchOperand_Success); 6223 6224 Operands.push_back( 6225 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6226 6227 if (FormatFound) 6228 trySkipToken(AsmToken::Comma); 6229 6230 if (isToken(AsmToken::EndOfStatement)) { 6231 // We are expecting an soffset operand, 6232 // but let matcher handle the error. 6233 return MatchOperand_Success; 6234 } 6235 6236 // Parse soffset. 6237 Res = parseRegOrImm(Operands); 6238 if (Res != MatchOperand_Success) 6239 return Res; 6240 6241 trySkipToken(AsmToken::Comma); 6242 6243 if (!FormatFound) { 6244 Res = parseSymbolicOrNumericFormat(Format); 6245 if (Res == MatchOperand_ParseFail) 6246 return Res; 6247 if (Res == MatchOperand_Success) { 6248 auto Size = Operands.size(); 6249 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6250 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6251 Op.setImm(Format); 6252 } 6253 return MatchOperand_Success; 6254 } 6255 6256 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6257 Error(getLoc(), "duplicate format"); 6258 return MatchOperand_ParseFail; 6259 } 6260 return MatchOperand_Success; 6261 } 6262 6263 //===----------------------------------------------------------------------===// 6264 // ds 6265 //===----------------------------------------------------------------------===// 6266 6267 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6268 const OperandVector &Operands) { 6269 OptionalImmIndexMap OptionalIdx; 6270 6271 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6272 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6273 6274 // Add the register arguments 6275 if (Op.isReg()) { 6276 Op.addRegOperands(Inst, 1); 6277 continue; 6278 } 6279 6280 // Handle optional arguments 6281 OptionalIdx[Op.getImmTy()] = i; 6282 } 6283 6284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6287 6288 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6289 } 6290 6291 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6292 bool IsGdsHardcoded) { 6293 OptionalImmIndexMap OptionalIdx; 6294 6295 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6296 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6297 6298 // Add the register arguments 6299 if (Op.isReg()) { 6300 Op.addRegOperands(Inst, 1); 6301 continue; 6302 } 6303 6304 if (Op.isToken() && Op.getToken() == "gds") { 6305 IsGdsHardcoded = true; 6306 continue; 6307 } 6308 6309 // Handle optional arguments 6310 OptionalIdx[Op.getImmTy()] = i; 6311 } 6312 6313 AMDGPUOperand::ImmTy OffsetType = 6314 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6315 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6316 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6317 AMDGPUOperand::ImmTyOffset; 6318 6319 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6320 6321 if (!IsGdsHardcoded) { 6322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6323 } 6324 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6325 } 6326 6327 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6328 OptionalImmIndexMap OptionalIdx; 6329 6330 unsigned OperandIdx[4]; 6331 unsigned EnMask = 0; 6332 int SrcIdx = 0; 6333 6334 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6335 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6336 6337 // Add the register arguments 6338 if (Op.isReg()) { 6339 assert(SrcIdx < 4); 6340 OperandIdx[SrcIdx] = Inst.size(); 6341 Op.addRegOperands(Inst, 1); 6342 ++SrcIdx; 6343 continue; 6344 } 6345 6346 if (Op.isOff()) { 6347 assert(SrcIdx < 4); 6348 OperandIdx[SrcIdx] = Inst.size(); 6349 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6350 ++SrcIdx; 6351 continue; 6352 } 6353 6354 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6355 Op.addImmOperands(Inst, 1); 6356 continue; 6357 } 6358 6359 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6360 continue; 6361 6362 // Handle optional arguments 6363 OptionalIdx[Op.getImmTy()] = i; 6364 } 6365 6366 assert(SrcIdx == 4); 6367 6368 bool Compr = false; 6369 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6370 Compr = true; 6371 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6372 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6373 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6374 } 6375 6376 for (auto i = 0; i < SrcIdx; ++i) { 6377 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6378 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6379 } 6380 } 6381 6382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6384 6385 Inst.addOperand(MCOperand::createImm(EnMask)); 6386 } 6387 6388 //===----------------------------------------------------------------------===// 6389 // s_waitcnt 6390 //===----------------------------------------------------------------------===// 6391 6392 static bool 6393 encodeCnt( 6394 const AMDGPU::IsaVersion ISA, 6395 int64_t &IntVal, 6396 int64_t CntVal, 6397 bool Saturate, 6398 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6399 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6400 { 6401 bool Failed = false; 6402 6403 IntVal = encode(ISA, IntVal, CntVal); 6404 if (CntVal != decode(ISA, IntVal)) { 6405 if (Saturate) { 6406 IntVal = encode(ISA, IntVal, -1); 6407 } else { 6408 Failed = true; 6409 } 6410 } 6411 return Failed; 6412 } 6413 6414 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6415 6416 SMLoc CntLoc = getLoc(); 6417 StringRef CntName = getTokenStr(); 6418 6419 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6420 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6421 return false; 6422 6423 int64_t CntVal; 6424 SMLoc ValLoc = getLoc(); 6425 if (!parseExpr(CntVal)) 6426 return false; 6427 6428 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6429 6430 bool Failed = true; 6431 bool Sat = CntName.endswith("_sat"); 6432 6433 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6434 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6435 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6436 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6437 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6438 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6439 } else { 6440 Error(CntLoc, "invalid counter name " + CntName); 6441 return false; 6442 } 6443 6444 if (Failed) { 6445 Error(ValLoc, "too large value for " + CntName); 6446 return false; 6447 } 6448 6449 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6450 return false; 6451 6452 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6453 if (isToken(AsmToken::EndOfStatement)) { 6454 Error(getLoc(), "expected a counter name"); 6455 return false; 6456 } 6457 } 6458 6459 return true; 6460 } 6461 6462 OperandMatchResultTy 6463 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6464 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6465 int64_t Waitcnt = getWaitcntBitMask(ISA); 6466 SMLoc S = getLoc(); 6467 6468 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6469 while (!isToken(AsmToken::EndOfStatement)) { 6470 if (!parseCnt(Waitcnt)) 6471 return MatchOperand_ParseFail; 6472 } 6473 } else { 6474 if (!parseExpr(Waitcnt)) 6475 return MatchOperand_ParseFail; 6476 } 6477 6478 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6479 return MatchOperand_Success; 6480 } 6481 6482 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6483 SMLoc FieldLoc = getLoc(); 6484 StringRef FieldName = getTokenStr(); 6485 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6486 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6487 return false; 6488 6489 SMLoc ValueLoc = getLoc(); 6490 StringRef ValueName = getTokenStr(); 6491 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6492 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6493 return false; 6494 6495 unsigned Shift; 6496 if (FieldName == "instid0") { 6497 Shift = 0; 6498 } else if (FieldName == "instskip") { 6499 Shift = 4; 6500 } else if (FieldName == "instid1") { 6501 Shift = 7; 6502 } else { 6503 Error(FieldLoc, "invalid field name " + FieldName); 6504 return false; 6505 } 6506 6507 int Value; 6508 if (Shift == 4) { 6509 // Parse values for instskip. 6510 Value = StringSwitch<int>(ValueName) 6511 .Case("SAME", 0) 6512 .Case("NEXT", 1) 6513 .Case("SKIP_1", 2) 6514 .Case("SKIP_2", 3) 6515 .Case("SKIP_3", 4) 6516 .Case("SKIP_4", 5) 6517 .Default(-1); 6518 } else { 6519 // Parse values for instid0 and instid1. 6520 Value = StringSwitch<int>(ValueName) 6521 .Case("NO_DEP", 0) 6522 .Case("VALU_DEP_1", 1) 6523 .Case("VALU_DEP_2", 2) 6524 .Case("VALU_DEP_3", 3) 6525 .Case("VALU_DEP_4", 4) 6526 .Case("TRANS32_DEP_1", 5) 6527 .Case("TRANS32_DEP_2", 6) 6528 .Case("TRANS32_DEP_3", 7) 6529 .Case("FMA_ACCUM_CYCLE_1", 8) 6530 .Case("SALU_CYCLE_1", 9) 6531 .Case("SALU_CYCLE_2", 10) 6532 .Case("SALU_CYCLE_3", 11) 6533 .Default(-1); 6534 } 6535 if (Value < 0) { 6536 Error(ValueLoc, "invalid value name " + ValueName); 6537 return false; 6538 } 6539 6540 Delay |= Value << Shift; 6541 return true; 6542 } 6543 6544 OperandMatchResultTy 6545 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6546 int64_t Delay = 0; 6547 SMLoc S = getLoc(); 6548 6549 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6550 do { 6551 if (!parseDelay(Delay)) 6552 return MatchOperand_ParseFail; 6553 } while (trySkipToken(AsmToken::Pipe)); 6554 } else { 6555 if (!parseExpr(Delay)) 6556 return MatchOperand_ParseFail; 6557 } 6558 6559 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6560 return MatchOperand_Success; 6561 } 6562 6563 bool 6564 AMDGPUOperand::isSWaitCnt() const { 6565 return isImm(); 6566 } 6567 6568 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6569 6570 //===----------------------------------------------------------------------===// 6571 // DepCtr 6572 //===----------------------------------------------------------------------===// 6573 6574 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6575 StringRef DepCtrName) { 6576 switch (ErrorId) { 6577 case OPR_ID_UNKNOWN: 6578 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6579 return; 6580 case OPR_ID_UNSUPPORTED: 6581 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6582 return; 6583 case OPR_ID_DUPLICATE: 6584 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6585 return; 6586 case OPR_VAL_INVALID: 6587 Error(Loc, Twine("invalid value for ", DepCtrName)); 6588 return; 6589 default: 6590 assert(false); 6591 } 6592 } 6593 6594 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6595 6596 using namespace llvm::AMDGPU::DepCtr; 6597 6598 SMLoc DepCtrLoc = getLoc(); 6599 StringRef DepCtrName = getTokenStr(); 6600 6601 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6602 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6603 return false; 6604 6605 int64_t ExprVal; 6606 if (!parseExpr(ExprVal)) 6607 return false; 6608 6609 unsigned PrevOprMask = UsedOprMask; 6610 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6611 6612 if (CntVal < 0) { 6613 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6614 return false; 6615 } 6616 6617 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6618 return false; 6619 6620 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6621 if (isToken(AsmToken::EndOfStatement)) { 6622 Error(getLoc(), "expected a counter name"); 6623 return false; 6624 } 6625 } 6626 6627 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6628 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6629 return true; 6630 } 6631 6632 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6633 using namespace llvm::AMDGPU::DepCtr; 6634 6635 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6636 SMLoc Loc = getLoc(); 6637 6638 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6639 unsigned UsedOprMask = 0; 6640 while (!isToken(AsmToken::EndOfStatement)) { 6641 if (!parseDepCtr(DepCtr, UsedOprMask)) 6642 return MatchOperand_ParseFail; 6643 } 6644 } else { 6645 if (!parseExpr(DepCtr)) 6646 return MatchOperand_ParseFail; 6647 } 6648 6649 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6650 return MatchOperand_Success; 6651 } 6652 6653 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6654 6655 //===----------------------------------------------------------------------===// 6656 // hwreg 6657 //===----------------------------------------------------------------------===// 6658 6659 bool 6660 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6661 OperandInfoTy &Offset, 6662 OperandInfoTy &Width) { 6663 using namespace llvm::AMDGPU::Hwreg; 6664 6665 // The register may be specified by name or using a numeric code 6666 HwReg.Loc = getLoc(); 6667 if (isToken(AsmToken::Identifier) && 6668 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6669 HwReg.IsSymbolic = true; 6670 lex(); // skip register name 6671 } else if (!parseExpr(HwReg.Id, "a register name")) { 6672 return false; 6673 } 6674 6675 if (trySkipToken(AsmToken::RParen)) 6676 return true; 6677 6678 // parse optional params 6679 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6680 return false; 6681 6682 Offset.Loc = getLoc(); 6683 if (!parseExpr(Offset.Id)) 6684 return false; 6685 6686 if (!skipToken(AsmToken::Comma, "expected a comma")) 6687 return false; 6688 6689 Width.Loc = getLoc(); 6690 return parseExpr(Width.Id) && 6691 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6692 } 6693 6694 bool 6695 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6696 const OperandInfoTy &Offset, 6697 const OperandInfoTy &Width) { 6698 6699 using namespace llvm::AMDGPU::Hwreg; 6700 6701 if (HwReg.IsSymbolic) { 6702 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6703 Error(HwReg.Loc, 6704 "specified hardware register is not supported on this GPU"); 6705 return false; 6706 } 6707 } else { 6708 if (!isValidHwreg(HwReg.Id)) { 6709 Error(HwReg.Loc, 6710 "invalid code of hardware register: only 6-bit values are legal"); 6711 return false; 6712 } 6713 } 6714 if (!isValidHwregOffset(Offset.Id)) { 6715 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6716 return false; 6717 } 6718 if (!isValidHwregWidth(Width.Id)) { 6719 Error(Width.Loc, 6720 "invalid bitfield width: only values from 1 to 32 are legal"); 6721 return false; 6722 } 6723 return true; 6724 } 6725 6726 OperandMatchResultTy 6727 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6728 using namespace llvm::AMDGPU::Hwreg; 6729 6730 int64_t ImmVal = 0; 6731 SMLoc Loc = getLoc(); 6732 6733 if (trySkipId("hwreg", AsmToken::LParen)) { 6734 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6735 OperandInfoTy Offset(OFFSET_DEFAULT_); 6736 OperandInfoTy Width(WIDTH_DEFAULT_); 6737 if (parseHwregBody(HwReg, Offset, Width) && 6738 validateHwreg(HwReg, Offset, Width)) { 6739 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6740 } else { 6741 return MatchOperand_ParseFail; 6742 } 6743 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6744 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6745 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6746 return MatchOperand_ParseFail; 6747 } 6748 } else { 6749 return MatchOperand_ParseFail; 6750 } 6751 6752 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6753 return MatchOperand_Success; 6754 } 6755 6756 bool AMDGPUOperand::isHwreg() const { 6757 return isImmTy(ImmTyHwreg); 6758 } 6759 6760 //===----------------------------------------------------------------------===// 6761 // sendmsg 6762 //===----------------------------------------------------------------------===// 6763 6764 bool 6765 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6766 OperandInfoTy &Op, 6767 OperandInfoTy &Stream) { 6768 using namespace llvm::AMDGPU::SendMsg; 6769 6770 Msg.Loc = getLoc(); 6771 if (isToken(AsmToken::Identifier) && 6772 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6773 Msg.IsSymbolic = true; 6774 lex(); // skip message name 6775 } else if (!parseExpr(Msg.Id, "a message name")) { 6776 return false; 6777 } 6778 6779 if (trySkipToken(AsmToken::Comma)) { 6780 Op.IsDefined = true; 6781 Op.Loc = getLoc(); 6782 if (isToken(AsmToken::Identifier) && 6783 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6784 lex(); // skip operation name 6785 } else if (!parseExpr(Op.Id, "an operation name")) { 6786 return false; 6787 } 6788 6789 if (trySkipToken(AsmToken::Comma)) { 6790 Stream.IsDefined = true; 6791 Stream.Loc = getLoc(); 6792 if (!parseExpr(Stream.Id)) 6793 return false; 6794 } 6795 } 6796 6797 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6798 } 6799 6800 bool 6801 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6802 const OperandInfoTy &Op, 6803 const OperandInfoTy &Stream) { 6804 using namespace llvm::AMDGPU::SendMsg; 6805 6806 // Validation strictness depends on whether message is specified 6807 // in a symbolic or in a numeric form. In the latter case 6808 // only encoding possibility is checked. 6809 bool Strict = Msg.IsSymbolic; 6810 6811 if (Strict) { 6812 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6813 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6814 return false; 6815 } 6816 } else { 6817 if (!isValidMsgId(Msg.Id, getSTI())) { 6818 Error(Msg.Loc, "invalid message id"); 6819 return false; 6820 } 6821 } 6822 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6823 if (Op.IsDefined) { 6824 Error(Op.Loc, "message does not support operations"); 6825 } else { 6826 Error(Msg.Loc, "missing message operation"); 6827 } 6828 return false; 6829 } 6830 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6831 Error(Op.Loc, "invalid operation id"); 6832 return false; 6833 } 6834 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6835 Stream.IsDefined) { 6836 Error(Stream.Loc, "message operation does not support streams"); 6837 return false; 6838 } 6839 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6840 Error(Stream.Loc, "invalid message stream id"); 6841 return false; 6842 } 6843 return true; 6844 } 6845 6846 OperandMatchResultTy 6847 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6848 using namespace llvm::AMDGPU::SendMsg; 6849 6850 int64_t ImmVal = 0; 6851 SMLoc Loc = getLoc(); 6852 6853 if (trySkipId("sendmsg", AsmToken::LParen)) { 6854 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6855 OperandInfoTy Op(OP_NONE_); 6856 OperandInfoTy Stream(STREAM_ID_NONE_); 6857 if (parseSendMsgBody(Msg, Op, Stream) && 6858 validateSendMsg(Msg, Op, Stream)) { 6859 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6860 } else { 6861 return MatchOperand_ParseFail; 6862 } 6863 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6864 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6865 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6866 return MatchOperand_ParseFail; 6867 } 6868 } else { 6869 return MatchOperand_ParseFail; 6870 } 6871 6872 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6873 return MatchOperand_Success; 6874 } 6875 6876 bool AMDGPUOperand::isSendMsg() const { 6877 return isImmTy(ImmTySendMsg); 6878 } 6879 6880 //===----------------------------------------------------------------------===// 6881 // v_interp 6882 //===----------------------------------------------------------------------===// 6883 6884 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6885 StringRef Str; 6886 SMLoc S = getLoc(); 6887 6888 if (!parseId(Str)) 6889 return MatchOperand_NoMatch; 6890 6891 int Slot = StringSwitch<int>(Str) 6892 .Case("p10", 0) 6893 .Case("p20", 1) 6894 .Case("p0", 2) 6895 .Default(-1); 6896 6897 if (Slot == -1) { 6898 Error(S, "invalid interpolation slot"); 6899 return MatchOperand_ParseFail; 6900 } 6901 6902 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6903 AMDGPUOperand::ImmTyInterpSlot)); 6904 return MatchOperand_Success; 6905 } 6906 6907 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6908 StringRef Str; 6909 SMLoc S = getLoc(); 6910 6911 if (!parseId(Str)) 6912 return MatchOperand_NoMatch; 6913 6914 if (!Str.startswith("attr")) { 6915 Error(S, "invalid interpolation attribute"); 6916 return MatchOperand_ParseFail; 6917 } 6918 6919 StringRef Chan = Str.take_back(2); 6920 int AttrChan = StringSwitch<int>(Chan) 6921 .Case(".x", 0) 6922 .Case(".y", 1) 6923 .Case(".z", 2) 6924 .Case(".w", 3) 6925 .Default(-1); 6926 if (AttrChan == -1) { 6927 Error(S, "invalid or missing interpolation attribute channel"); 6928 return MatchOperand_ParseFail; 6929 } 6930 6931 Str = Str.drop_back(2).drop_front(4); 6932 6933 uint8_t Attr; 6934 if (Str.getAsInteger(10, Attr)) { 6935 Error(S, "invalid or missing interpolation attribute number"); 6936 return MatchOperand_ParseFail; 6937 } 6938 6939 if (Attr > 63) { 6940 Error(S, "out of bounds interpolation attribute number"); 6941 return MatchOperand_ParseFail; 6942 } 6943 6944 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6945 6946 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6947 AMDGPUOperand::ImmTyInterpAttr)); 6948 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6949 AMDGPUOperand::ImmTyAttrChan)); 6950 return MatchOperand_Success; 6951 } 6952 6953 //===----------------------------------------------------------------------===// 6954 // exp 6955 //===----------------------------------------------------------------------===// 6956 6957 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6958 using namespace llvm::AMDGPU::Exp; 6959 6960 StringRef Str; 6961 SMLoc S = getLoc(); 6962 6963 if (!parseId(Str)) 6964 return MatchOperand_NoMatch; 6965 6966 unsigned Id = getTgtId(Str); 6967 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6968 Error(S, (Id == ET_INVALID) ? 6969 "invalid exp target" : 6970 "exp target is not supported on this GPU"); 6971 return MatchOperand_ParseFail; 6972 } 6973 6974 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6975 AMDGPUOperand::ImmTyExpTgt)); 6976 return MatchOperand_Success; 6977 } 6978 6979 //===----------------------------------------------------------------------===// 6980 // parser helpers 6981 //===----------------------------------------------------------------------===// 6982 6983 bool 6984 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6985 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6986 } 6987 6988 bool 6989 AMDGPUAsmParser::isId(const StringRef Id) const { 6990 return isId(getToken(), Id); 6991 } 6992 6993 bool 6994 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6995 return getTokenKind() == Kind; 6996 } 6997 6998 bool 6999 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7000 if (isId(Id)) { 7001 lex(); 7002 return true; 7003 } 7004 return false; 7005 } 7006 7007 bool 7008 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7009 if (isToken(AsmToken::Identifier)) { 7010 StringRef Tok = getTokenStr(); 7011 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7012 lex(); 7013 return true; 7014 } 7015 } 7016 return false; 7017 } 7018 7019 bool 7020 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7021 if (isId(Id) && peekToken().is(Kind)) { 7022 lex(); 7023 lex(); 7024 return true; 7025 } 7026 return false; 7027 } 7028 7029 bool 7030 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7031 if (isToken(Kind)) { 7032 lex(); 7033 return true; 7034 } 7035 return false; 7036 } 7037 7038 bool 7039 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7040 const StringRef ErrMsg) { 7041 if (!trySkipToken(Kind)) { 7042 Error(getLoc(), ErrMsg); 7043 return false; 7044 } 7045 return true; 7046 } 7047 7048 bool 7049 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7050 SMLoc S = getLoc(); 7051 7052 const MCExpr *Expr; 7053 if (Parser.parseExpression(Expr)) 7054 return false; 7055 7056 if (Expr->evaluateAsAbsolute(Imm)) 7057 return true; 7058 7059 if (Expected.empty()) { 7060 Error(S, "expected absolute expression"); 7061 } else { 7062 Error(S, Twine("expected ", Expected) + 7063 Twine(" or an absolute expression")); 7064 } 7065 return false; 7066 } 7067 7068 bool 7069 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7070 SMLoc S = getLoc(); 7071 7072 const MCExpr *Expr; 7073 if (Parser.parseExpression(Expr)) 7074 return false; 7075 7076 int64_t IntVal; 7077 if (Expr->evaluateAsAbsolute(IntVal)) { 7078 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7079 } else { 7080 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7081 } 7082 return true; 7083 } 7084 7085 bool 7086 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7087 if (isToken(AsmToken::String)) { 7088 Val = getToken().getStringContents(); 7089 lex(); 7090 return true; 7091 } else { 7092 Error(getLoc(), ErrMsg); 7093 return false; 7094 } 7095 } 7096 7097 bool 7098 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7099 if (isToken(AsmToken::Identifier)) { 7100 Val = getTokenStr(); 7101 lex(); 7102 return true; 7103 } else { 7104 if (!ErrMsg.empty()) 7105 Error(getLoc(), ErrMsg); 7106 return false; 7107 } 7108 } 7109 7110 AsmToken 7111 AMDGPUAsmParser::getToken() const { 7112 return Parser.getTok(); 7113 } 7114 7115 AsmToken 7116 AMDGPUAsmParser::peekToken() { 7117 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7118 } 7119 7120 void 7121 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7122 auto TokCount = getLexer().peekTokens(Tokens); 7123 7124 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7125 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7126 } 7127 7128 AsmToken::TokenKind 7129 AMDGPUAsmParser::getTokenKind() const { 7130 return getLexer().getKind(); 7131 } 7132 7133 SMLoc 7134 AMDGPUAsmParser::getLoc() const { 7135 return getToken().getLoc(); 7136 } 7137 7138 StringRef 7139 AMDGPUAsmParser::getTokenStr() const { 7140 return getToken().getString(); 7141 } 7142 7143 void 7144 AMDGPUAsmParser::lex() { 7145 Parser.Lex(); 7146 } 7147 7148 SMLoc 7149 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7150 const OperandVector &Operands) const { 7151 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7152 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7153 if (Test(Op)) 7154 return Op.getStartLoc(); 7155 } 7156 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7157 } 7158 7159 SMLoc 7160 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7161 const OperandVector &Operands) const { 7162 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7163 return getOperandLoc(Test, Operands); 7164 } 7165 7166 SMLoc 7167 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7168 const OperandVector &Operands) const { 7169 auto Test = [=](const AMDGPUOperand& Op) { 7170 return Op.isRegKind() && Op.getReg() == Reg; 7171 }; 7172 return getOperandLoc(Test, Operands); 7173 } 7174 7175 SMLoc 7176 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7177 auto Test = [](const AMDGPUOperand& Op) { 7178 return Op.IsImmKindLiteral() || Op.isExpr(); 7179 }; 7180 return getOperandLoc(Test, Operands); 7181 } 7182 7183 SMLoc 7184 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7185 auto Test = [](const AMDGPUOperand& Op) { 7186 return Op.isImmKindConst(); 7187 }; 7188 return getOperandLoc(Test, Operands); 7189 } 7190 7191 //===----------------------------------------------------------------------===// 7192 // swizzle 7193 //===----------------------------------------------------------------------===// 7194 7195 LLVM_READNONE 7196 static unsigned 7197 encodeBitmaskPerm(const unsigned AndMask, 7198 const unsigned OrMask, 7199 const unsigned XorMask) { 7200 using namespace llvm::AMDGPU::Swizzle; 7201 7202 return BITMASK_PERM_ENC | 7203 (AndMask << BITMASK_AND_SHIFT) | 7204 (OrMask << BITMASK_OR_SHIFT) | 7205 (XorMask << BITMASK_XOR_SHIFT); 7206 } 7207 7208 bool 7209 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7210 const unsigned MinVal, 7211 const unsigned MaxVal, 7212 const StringRef ErrMsg, 7213 SMLoc &Loc) { 7214 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7215 return false; 7216 } 7217 Loc = getLoc(); 7218 if (!parseExpr(Op)) { 7219 return false; 7220 } 7221 if (Op < MinVal || Op > MaxVal) { 7222 Error(Loc, ErrMsg); 7223 return false; 7224 } 7225 7226 return true; 7227 } 7228 7229 bool 7230 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7231 const unsigned MinVal, 7232 const unsigned MaxVal, 7233 const StringRef ErrMsg) { 7234 SMLoc Loc; 7235 for (unsigned i = 0; i < OpNum; ++i) { 7236 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7237 return false; 7238 } 7239 7240 return true; 7241 } 7242 7243 bool 7244 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7245 using namespace llvm::AMDGPU::Swizzle; 7246 7247 int64_t Lane[LANE_NUM]; 7248 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7249 "expected a 2-bit lane id")) { 7250 Imm = QUAD_PERM_ENC; 7251 for (unsigned I = 0; I < LANE_NUM; ++I) { 7252 Imm |= Lane[I] << (LANE_SHIFT * I); 7253 } 7254 return true; 7255 } 7256 return false; 7257 } 7258 7259 bool 7260 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7261 using namespace llvm::AMDGPU::Swizzle; 7262 7263 SMLoc Loc; 7264 int64_t GroupSize; 7265 int64_t LaneIdx; 7266 7267 if (!parseSwizzleOperand(GroupSize, 7268 2, 32, 7269 "group size must be in the interval [2,32]", 7270 Loc)) { 7271 return false; 7272 } 7273 if (!isPowerOf2_64(GroupSize)) { 7274 Error(Loc, "group size must be a power of two"); 7275 return false; 7276 } 7277 if (parseSwizzleOperand(LaneIdx, 7278 0, GroupSize - 1, 7279 "lane id must be in the interval [0,group size - 1]", 7280 Loc)) { 7281 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7282 return true; 7283 } 7284 return false; 7285 } 7286 7287 bool 7288 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7289 using namespace llvm::AMDGPU::Swizzle; 7290 7291 SMLoc Loc; 7292 int64_t GroupSize; 7293 7294 if (!parseSwizzleOperand(GroupSize, 7295 2, 32, 7296 "group size must be in the interval [2,32]", 7297 Loc)) { 7298 return false; 7299 } 7300 if (!isPowerOf2_64(GroupSize)) { 7301 Error(Loc, "group size must be a power of two"); 7302 return false; 7303 } 7304 7305 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7306 return true; 7307 } 7308 7309 bool 7310 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7311 using namespace llvm::AMDGPU::Swizzle; 7312 7313 SMLoc Loc; 7314 int64_t GroupSize; 7315 7316 if (!parseSwizzleOperand(GroupSize, 7317 1, 16, 7318 "group size must be in the interval [1,16]", 7319 Loc)) { 7320 return false; 7321 } 7322 if (!isPowerOf2_64(GroupSize)) { 7323 Error(Loc, "group size must be a power of two"); 7324 return false; 7325 } 7326 7327 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7328 return true; 7329 } 7330 7331 bool 7332 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7333 using namespace llvm::AMDGPU::Swizzle; 7334 7335 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7336 return false; 7337 } 7338 7339 StringRef Ctl; 7340 SMLoc StrLoc = getLoc(); 7341 if (!parseString(Ctl)) { 7342 return false; 7343 } 7344 if (Ctl.size() != BITMASK_WIDTH) { 7345 Error(StrLoc, "expected a 5-character mask"); 7346 return false; 7347 } 7348 7349 unsigned AndMask = 0; 7350 unsigned OrMask = 0; 7351 unsigned XorMask = 0; 7352 7353 for (size_t i = 0; i < Ctl.size(); ++i) { 7354 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7355 switch(Ctl[i]) { 7356 default: 7357 Error(StrLoc, "invalid mask"); 7358 return false; 7359 case '0': 7360 break; 7361 case '1': 7362 OrMask |= Mask; 7363 break; 7364 case 'p': 7365 AndMask |= Mask; 7366 break; 7367 case 'i': 7368 AndMask |= Mask; 7369 XorMask |= Mask; 7370 break; 7371 } 7372 } 7373 7374 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7375 return true; 7376 } 7377 7378 bool 7379 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7380 7381 SMLoc OffsetLoc = getLoc(); 7382 7383 if (!parseExpr(Imm, "a swizzle macro")) { 7384 return false; 7385 } 7386 if (!isUInt<16>(Imm)) { 7387 Error(OffsetLoc, "expected a 16-bit offset"); 7388 return false; 7389 } 7390 return true; 7391 } 7392 7393 bool 7394 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7395 using namespace llvm::AMDGPU::Swizzle; 7396 7397 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7398 7399 SMLoc ModeLoc = getLoc(); 7400 bool Ok = false; 7401 7402 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7403 Ok = parseSwizzleQuadPerm(Imm); 7404 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7405 Ok = parseSwizzleBitmaskPerm(Imm); 7406 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7407 Ok = parseSwizzleBroadcast(Imm); 7408 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7409 Ok = parseSwizzleSwap(Imm); 7410 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7411 Ok = parseSwizzleReverse(Imm); 7412 } else { 7413 Error(ModeLoc, "expected a swizzle mode"); 7414 } 7415 7416 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7417 } 7418 7419 return false; 7420 } 7421 7422 OperandMatchResultTy 7423 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7424 SMLoc S = getLoc(); 7425 int64_t Imm = 0; 7426 7427 if (trySkipId("offset")) { 7428 7429 bool Ok = false; 7430 if (skipToken(AsmToken::Colon, "expected a colon")) { 7431 if (trySkipId("swizzle")) { 7432 Ok = parseSwizzleMacro(Imm); 7433 } else { 7434 Ok = parseSwizzleOffset(Imm); 7435 } 7436 } 7437 7438 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7439 7440 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7441 } else { 7442 // Swizzle "offset" operand is optional. 7443 // If it is omitted, try parsing other optional operands. 7444 return parseOptionalOpr(Operands); 7445 } 7446 } 7447 7448 bool 7449 AMDGPUOperand::isSwizzle() const { 7450 return isImmTy(ImmTySwizzle); 7451 } 7452 7453 //===----------------------------------------------------------------------===// 7454 // VGPR Index Mode 7455 //===----------------------------------------------------------------------===// 7456 7457 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7458 7459 using namespace llvm::AMDGPU::VGPRIndexMode; 7460 7461 if (trySkipToken(AsmToken::RParen)) { 7462 return OFF; 7463 } 7464 7465 int64_t Imm = 0; 7466 7467 while (true) { 7468 unsigned Mode = 0; 7469 SMLoc S = getLoc(); 7470 7471 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7472 if (trySkipId(IdSymbolic[ModeId])) { 7473 Mode = 1 << ModeId; 7474 break; 7475 } 7476 } 7477 7478 if (Mode == 0) { 7479 Error(S, (Imm == 0)? 7480 "expected a VGPR index mode or a closing parenthesis" : 7481 "expected a VGPR index mode"); 7482 return UNDEF; 7483 } 7484 7485 if (Imm & Mode) { 7486 Error(S, "duplicate VGPR index mode"); 7487 return UNDEF; 7488 } 7489 Imm |= Mode; 7490 7491 if (trySkipToken(AsmToken::RParen)) 7492 break; 7493 if (!skipToken(AsmToken::Comma, 7494 "expected a comma or a closing parenthesis")) 7495 return UNDEF; 7496 } 7497 7498 return Imm; 7499 } 7500 7501 OperandMatchResultTy 7502 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7503 7504 using namespace llvm::AMDGPU::VGPRIndexMode; 7505 7506 int64_t Imm = 0; 7507 SMLoc S = getLoc(); 7508 7509 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7510 Imm = parseGPRIdxMacro(); 7511 if (Imm == UNDEF) 7512 return MatchOperand_ParseFail; 7513 } else { 7514 if (getParser().parseAbsoluteExpression(Imm)) 7515 return MatchOperand_ParseFail; 7516 if (Imm < 0 || !isUInt<4>(Imm)) { 7517 Error(S, "invalid immediate: only 4-bit values are legal"); 7518 return MatchOperand_ParseFail; 7519 } 7520 } 7521 7522 Operands.push_back( 7523 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7524 return MatchOperand_Success; 7525 } 7526 7527 bool AMDGPUOperand::isGPRIdxMode() const { 7528 return isImmTy(ImmTyGprIdxMode); 7529 } 7530 7531 //===----------------------------------------------------------------------===// 7532 // sopp branch targets 7533 //===----------------------------------------------------------------------===// 7534 7535 OperandMatchResultTy 7536 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7537 7538 // Make sure we are not parsing something 7539 // that looks like a label or an expression but is not. 7540 // This will improve error messages. 7541 if (isRegister() || isModifier()) 7542 return MatchOperand_NoMatch; 7543 7544 if (!parseExpr(Operands)) 7545 return MatchOperand_ParseFail; 7546 7547 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7548 assert(Opr.isImm() || Opr.isExpr()); 7549 SMLoc Loc = Opr.getStartLoc(); 7550 7551 // Currently we do not support arbitrary expressions as branch targets. 7552 // Only labels and absolute expressions are accepted. 7553 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7554 Error(Loc, "expected an absolute expression or a label"); 7555 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7556 Error(Loc, "expected a 16-bit signed jump offset"); 7557 } 7558 7559 return MatchOperand_Success; 7560 } 7561 7562 //===----------------------------------------------------------------------===// 7563 // Boolean holding registers 7564 //===----------------------------------------------------------------------===// 7565 7566 OperandMatchResultTy 7567 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7568 return parseReg(Operands); 7569 } 7570 7571 //===----------------------------------------------------------------------===// 7572 // mubuf 7573 //===----------------------------------------------------------------------===// 7574 7575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7576 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7577 } 7578 7579 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7580 const OperandVector &Operands, 7581 bool IsAtomic, 7582 bool IsLds) { 7583 OptionalImmIndexMap OptionalIdx; 7584 unsigned FirstOperandIdx = 1; 7585 bool IsAtomicReturn = false; 7586 7587 if (IsAtomic) { 7588 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7589 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7590 if (!Op.isCPol()) 7591 continue; 7592 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7593 break; 7594 } 7595 7596 if (!IsAtomicReturn) { 7597 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7598 if (NewOpc != -1) 7599 Inst.setOpcode(NewOpc); 7600 } 7601 7602 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7603 SIInstrFlags::IsAtomicRet; 7604 } 7605 7606 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7607 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7608 7609 // Add the register arguments 7610 if (Op.isReg()) { 7611 Op.addRegOperands(Inst, 1); 7612 // Insert a tied src for atomic return dst. 7613 // This cannot be postponed as subsequent calls to 7614 // addImmOperands rely on correct number of MC operands. 7615 if (IsAtomicReturn && i == FirstOperandIdx) 7616 Op.addRegOperands(Inst, 1); 7617 continue; 7618 } 7619 7620 // Handle the case where soffset is an immediate 7621 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7622 Op.addImmOperands(Inst, 1); 7623 continue; 7624 } 7625 7626 // Handle tokens like 'offen' which are sometimes hard-coded into the 7627 // asm string. There are no MCInst operands for these. 7628 if (Op.isToken()) { 7629 continue; 7630 } 7631 assert(Op.isImm()); 7632 7633 // Handle optional arguments 7634 OptionalIdx[Op.getImmTy()] = i; 7635 } 7636 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7639 7640 if (!IsLds) { // tfe is not legal with lds opcodes 7641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7642 } 7643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7644 } 7645 7646 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7647 OptionalImmIndexMap OptionalIdx; 7648 7649 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7650 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7651 7652 // Add the register arguments 7653 if (Op.isReg()) { 7654 Op.addRegOperands(Inst, 1); 7655 continue; 7656 } 7657 7658 // Handle the case where soffset is an immediate 7659 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7660 Op.addImmOperands(Inst, 1); 7661 continue; 7662 } 7663 7664 // Handle tokens like 'offen' which are sometimes hard-coded into the 7665 // asm string. There are no MCInst operands for these. 7666 if (Op.isToken()) { 7667 continue; 7668 } 7669 assert(Op.isImm()); 7670 7671 // Handle optional arguments 7672 OptionalIdx[Op.getImmTy()] = i; 7673 } 7674 7675 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7676 AMDGPUOperand::ImmTyOffset); 7677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7681 } 7682 7683 //===----------------------------------------------------------------------===// 7684 // mimg 7685 //===----------------------------------------------------------------------===// 7686 7687 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7688 bool IsAtomic) { 7689 unsigned I = 1; 7690 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7691 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7692 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7693 } 7694 7695 if (IsAtomic) { 7696 // Add src, same as dst 7697 assert(Desc.getNumDefs() == 1); 7698 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7699 } 7700 7701 OptionalImmIndexMap OptionalIdx; 7702 7703 for (unsigned E = Operands.size(); I != E; ++I) { 7704 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7705 7706 // Add the register arguments 7707 if (Op.isReg()) { 7708 Op.addRegOperands(Inst, 1); 7709 } else if (Op.isImmModifier()) { 7710 OptionalIdx[Op.getImmTy()] = I; 7711 } else if (!Op.isToken()) { 7712 llvm_unreachable("unexpected operand type"); 7713 } 7714 } 7715 7716 bool IsGFX10Plus = isGFX10Plus(); 7717 7718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7719 if (IsGFX10Plus) 7720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7724 if (IsGFX10Plus) 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7726 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7729 if (!IsGFX10Plus) 7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7732 } 7733 7734 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7735 cvtMIMG(Inst, Operands, true); 7736 } 7737 7738 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7739 OptionalImmIndexMap OptionalIdx; 7740 bool IsAtomicReturn = false; 7741 7742 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7743 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7744 if (!Op.isCPol()) 7745 continue; 7746 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7747 break; 7748 } 7749 7750 if (!IsAtomicReturn) { 7751 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7752 if (NewOpc != -1) 7753 Inst.setOpcode(NewOpc); 7754 } 7755 7756 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7757 SIInstrFlags::IsAtomicRet; 7758 7759 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7760 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7761 7762 // Add the register arguments 7763 if (Op.isReg()) { 7764 Op.addRegOperands(Inst, 1); 7765 if (IsAtomicReturn && i == 1) 7766 Op.addRegOperands(Inst, 1); 7767 continue; 7768 } 7769 7770 // Handle the case where soffset is an immediate 7771 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7772 Op.addImmOperands(Inst, 1); 7773 continue; 7774 } 7775 7776 // Handle tokens like 'offen' which are sometimes hard-coded into the 7777 // asm string. There are no MCInst operands for these. 7778 if (Op.isToken()) { 7779 continue; 7780 } 7781 assert(Op.isImm()); 7782 7783 // Handle optional arguments 7784 OptionalIdx[Op.getImmTy()] = i; 7785 } 7786 7787 if ((int)Inst.getNumOperands() <= 7788 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7791 } 7792 7793 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7794 const OperandVector &Operands) { 7795 for (unsigned I = 1; I < Operands.size(); ++I) { 7796 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7797 if (Operand.isReg()) 7798 Operand.addRegOperands(Inst, 1); 7799 } 7800 7801 Inst.addOperand(MCOperand::createImm(1)); // a16 7802 } 7803 7804 //===----------------------------------------------------------------------===// 7805 // smrd 7806 //===----------------------------------------------------------------------===// 7807 7808 bool AMDGPUOperand::isSMRDOffset8() const { 7809 return isImm() && isUInt<8>(getImm()); 7810 } 7811 7812 bool AMDGPUOperand::isSMEMOffset() const { 7813 return isImmTy(ImmTyNone) || 7814 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7815 } 7816 7817 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7818 // 32-bit literals are only supported on CI and we only want to use them 7819 // when the offset is > 8-bits. 7820 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7821 } 7822 7823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7824 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7825 } 7826 7827 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7828 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7829 } 7830 7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7832 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7833 } 7834 7835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7836 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7837 } 7838 7839 //===----------------------------------------------------------------------===// 7840 // vop3 7841 //===----------------------------------------------------------------------===// 7842 7843 static bool ConvertOmodMul(int64_t &Mul) { 7844 if (Mul != 1 && Mul != 2 && Mul != 4) 7845 return false; 7846 7847 Mul >>= 1; 7848 return true; 7849 } 7850 7851 static bool ConvertOmodDiv(int64_t &Div) { 7852 if (Div == 1) { 7853 Div = 0; 7854 return true; 7855 } 7856 7857 if (Div == 2) { 7858 Div = 3; 7859 return true; 7860 } 7861 7862 return false; 7863 } 7864 7865 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7866 // This is intentional and ensures compatibility with sp3. 7867 // See bug 35397 for details. 7868 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7869 if (BoundCtrl == 0 || BoundCtrl == 1) { 7870 BoundCtrl = 1; 7871 return true; 7872 } 7873 return false; 7874 } 7875 7876 // Note: the order in this table matches the order of operands in AsmString. 7877 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7878 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7879 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7880 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7881 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7882 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7883 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7884 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7885 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7886 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7887 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7888 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7889 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7890 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7891 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7892 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7893 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7894 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7895 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7896 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7897 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7898 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7899 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7900 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7901 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7902 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7903 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7904 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7905 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7906 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7907 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7908 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7909 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7910 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7911 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7912 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7913 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7914 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7915 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7916 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7917 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7918 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7919 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7920 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7921 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7922 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7923 }; 7924 7925 void AMDGPUAsmParser::onBeginOfFile() { 7926 if (!getParser().getStreamer().getTargetStreamer() || 7927 getSTI().getTargetTriple().getArch() == Triple::r600) 7928 return; 7929 7930 if (!getTargetStreamer().getTargetID()) 7931 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7932 7933 if (isHsaAbiVersion3AndAbove(&getSTI())) 7934 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7935 } 7936 7937 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7938 7939 OperandMatchResultTy res = parseOptionalOpr(Operands); 7940 7941 // This is a hack to enable hardcoded mandatory operands which follow 7942 // optional operands. 7943 // 7944 // Current design assumes that all operands after the first optional operand 7945 // are also optional. However implementation of some instructions violates 7946 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7947 // 7948 // To alleviate this problem, we have to (implicitly) parse extra operands 7949 // to make sure autogenerated parser of custom operands never hit hardcoded 7950 // mandatory operands. 7951 7952 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7953 if (res != MatchOperand_Success || 7954 isToken(AsmToken::EndOfStatement)) 7955 break; 7956 7957 trySkipToken(AsmToken::Comma); 7958 res = parseOptionalOpr(Operands); 7959 } 7960 7961 return res; 7962 } 7963 7964 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7965 OperandMatchResultTy res; 7966 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7967 // try to parse any optional operand here 7968 if (Op.IsBit) { 7969 res = parseNamedBit(Op.Name, Operands, Op.Type); 7970 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7971 res = parseOModOperand(Operands); 7972 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7973 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7974 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7975 res = parseSDWASel(Operands, Op.Name, Op.Type); 7976 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7977 res = parseSDWADstUnused(Operands); 7978 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7979 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7980 Op.Type == AMDGPUOperand::ImmTyNegLo || 7981 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7982 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7983 Op.ConvertResult); 7984 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7985 res = parseDim(Operands); 7986 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7987 res = parseCPol(Operands); 7988 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7989 res = parseDPP8(Operands); 7990 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7991 res = parseDPPCtrl(Operands); 7992 } else { 7993 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7994 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7995 res = parseOperandArrayWithPrefix("neg", Operands, 7996 AMDGPUOperand::ImmTyBLGP, 7997 nullptr); 7998 } 7999 } 8000 if (res != MatchOperand_NoMatch) { 8001 return res; 8002 } 8003 } 8004 return MatchOperand_NoMatch; 8005 } 8006 8007 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8008 StringRef Name = getTokenStr(); 8009 if (Name == "mul") { 8010 return parseIntWithPrefix("mul", Operands, 8011 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8012 } 8013 8014 if (Name == "div") { 8015 return parseIntWithPrefix("div", Operands, 8016 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8017 } 8018 8019 return MatchOperand_NoMatch; 8020 } 8021 8022 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8023 cvtVOP3P(Inst, Operands); 8024 8025 int Opc = Inst.getOpcode(); 8026 8027 int SrcNum; 8028 const int Ops[] = { AMDGPU::OpName::src0, 8029 AMDGPU::OpName::src1, 8030 AMDGPU::OpName::src2 }; 8031 for (SrcNum = 0; 8032 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8033 ++SrcNum); 8034 assert(SrcNum > 0); 8035 8036 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8037 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8038 8039 if ((OpSel & (1 << SrcNum)) != 0) { 8040 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8041 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8042 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8043 } 8044 } 8045 8046 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8047 // 1. This operand is input modifiers 8048 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8049 // 2. This is not last operand 8050 && Desc.NumOperands > (OpNum + 1) 8051 // 3. Next operand is register class 8052 && Desc.OpInfo[OpNum + 1].RegClass != -1 8053 // 4. Next register is not tied to any other operand 8054 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8055 } 8056 8057 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8058 { 8059 OptionalImmIndexMap OptionalIdx; 8060 unsigned Opc = Inst.getOpcode(); 8061 8062 unsigned I = 1; 8063 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8064 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8065 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8066 } 8067 8068 for (unsigned E = Operands.size(); I != E; ++I) { 8069 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8070 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8071 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8072 } else if (Op.isInterpSlot() || 8073 Op.isInterpAttr() || 8074 Op.isAttrChan()) { 8075 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8076 } else if (Op.isImmModifier()) { 8077 OptionalIdx[Op.getImmTy()] = I; 8078 } else { 8079 llvm_unreachable("unhandled operand type"); 8080 } 8081 } 8082 8083 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8085 } 8086 8087 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8089 } 8090 8091 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8093 } 8094 } 8095 8096 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8097 { 8098 OptionalImmIndexMap OptionalIdx; 8099 unsigned Opc = Inst.getOpcode(); 8100 8101 unsigned I = 1; 8102 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8103 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8104 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8105 } 8106 8107 for (unsigned E = Operands.size(); I != E; ++I) { 8108 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8109 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8110 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8111 } else if (Op.isImmModifier()) { 8112 OptionalIdx[Op.getImmTy()] = I; 8113 } else { 8114 llvm_unreachable("unhandled operand type"); 8115 } 8116 } 8117 8118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8119 8120 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8121 if (OpSelIdx != -1) 8122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8123 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8125 8126 if (OpSelIdx == -1) 8127 return; 8128 8129 const int Ops[] = { AMDGPU::OpName::src0, 8130 AMDGPU::OpName::src1, 8131 AMDGPU::OpName::src2 }; 8132 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8133 AMDGPU::OpName::src1_modifiers, 8134 AMDGPU::OpName::src2_modifiers }; 8135 8136 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8137 8138 for (int J = 0; J < 3; ++J) { 8139 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8140 if (OpIdx == -1) 8141 break; 8142 8143 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8144 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8145 8146 if ((OpSel & (1 << J)) != 0) 8147 ModVal |= SISrcMods::OP_SEL_0; 8148 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8149 (OpSel & (1 << 3)) != 0) 8150 ModVal |= SISrcMods::DST_OP_SEL; 8151 8152 Inst.getOperand(ModIdx).setImm(ModVal); 8153 } 8154 } 8155 8156 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8157 OptionalImmIndexMap &OptionalIdx) { 8158 unsigned Opc = Inst.getOpcode(); 8159 8160 unsigned I = 1; 8161 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8162 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8163 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8164 } 8165 8166 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8167 // This instruction has src modifiers 8168 for (unsigned E = Operands.size(); I != E; ++I) { 8169 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8170 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8171 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8172 } else if (Op.isImmModifier()) { 8173 OptionalIdx[Op.getImmTy()] = I; 8174 } else if (Op.isRegOrImm()) { 8175 Op.addRegOrImmOperands(Inst, 1); 8176 } else { 8177 llvm_unreachable("unhandled operand type"); 8178 } 8179 } 8180 } else { 8181 // No src modifiers 8182 for (unsigned E = Operands.size(); I != E; ++I) { 8183 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8184 if (Op.isMod()) { 8185 OptionalIdx[Op.getImmTy()] = I; 8186 } else { 8187 Op.addRegOrImmOperands(Inst, 1); 8188 } 8189 } 8190 } 8191 8192 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8194 } 8195 8196 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8198 } 8199 8200 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8201 // it has src2 register operand that is tied to dst operand 8202 // we don't allow modifiers for this operand in assembler so src2_modifiers 8203 // should be 0. 8204 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8205 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8206 Opc == AMDGPU::V_MAC_F32_e64_vi || 8207 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8208 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8209 Opc == AMDGPU::V_MAC_F16_e64_vi || 8210 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8211 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8212 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8213 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8214 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8215 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8216 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8217 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8218 auto it = Inst.begin(); 8219 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8220 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8221 ++it; 8222 // Copy the operand to ensure it's not invalidated when Inst grows. 8223 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8224 } 8225 } 8226 8227 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8228 OptionalImmIndexMap OptionalIdx; 8229 cvtVOP3(Inst, Operands, OptionalIdx); 8230 } 8231 8232 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8233 OptionalImmIndexMap &OptIdx) { 8234 const int Opc = Inst.getOpcode(); 8235 const MCInstrDesc &Desc = MII.get(Opc); 8236 8237 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8238 8239 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8240 assert(!IsPacked); 8241 Inst.addOperand(Inst.getOperand(0)); 8242 } 8243 8244 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8245 // instruction, and then figure out where to actually put the modifiers 8246 8247 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8248 if (OpSelIdx != -1) { 8249 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8250 } 8251 8252 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8253 if (OpSelHiIdx != -1) { 8254 int DefaultVal = IsPacked ? -1 : 0; 8255 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8256 DefaultVal); 8257 } 8258 8259 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8260 if (NegLoIdx != -1) { 8261 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8262 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8263 } 8264 8265 const int Ops[] = { AMDGPU::OpName::src0, 8266 AMDGPU::OpName::src1, 8267 AMDGPU::OpName::src2 }; 8268 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8269 AMDGPU::OpName::src1_modifiers, 8270 AMDGPU::OpName::src2_modifiers }; 8271 8272 unsigned OpSel = 0; 8273 unsigned OpSelHi = 0; 8274 unsigned NegLo = 0; 8275 unsigned NegHi = 0; 8276 8277 if (OpSelIdx != -1) 8278 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8279 8280 if (OpSelHiIdx != -1) 8281 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8282 8283 if (NegLoIdx != -1) { 8284 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8285 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8286 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8287 } 8288 8289 for (int J = 0; J < 3; ++J) { 8290 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8291 if (OpIdx == -1) 8292 break; 8293 8294 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8295 8296 if (ModIdx == -1) 8297 continue; 8298 8299 uint32_t ModVal = 0; 8300 8301 if ((OpSel & (1 << J)) != 0) 8302 ModVal |= SISrcMods::OP_SEL_0; 8303 8304 if ((OpSelHi & (1 << J)) != 0) 8305 ModVal |= SISrcMods::OP_SEL_1; 8306 8307 if ((NegLo & (1 << J)) != 0) 8308 ModVal |= SISrcMods::NEG; 8309 8310 if ((NegHi & (1 << J)) != 0) 8311 ModVal |= SISrcMods::NEG_HI; 8312 8313 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8314 } 8315 } 8316 8317 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8318 OptionalImmIndexMap OptIdx; 8319 cvtVOP3(Inst, Operands, OptIdx); 8320 cvtVOP3P(Inst, Operands, OptIdx); 8321 } 8322 8323 //===----------------------------------------------------------------------===// 8324 // dpp 8325 //===----------------------------------------------------------------------===// 8326 8327 bool AMDGPUOperand::isDPP8() const { 8328 return isImmTy(ImmTyDPP8); 8329 } 8330 8331 bool AMDGPUOperand::isDPPCtrl() const { 8332 using namespace AMDGPU::DPP; 8333 8334 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8335 if (result) { 8336 int64_t Imm = getImm(); 8337 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8338 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8339 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8340 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8341 (Imm == DppCtrl::WAVE_SHL1) || 8342 (Imm == DppCtrl::WAVE_ROL1) || 8343 (Imm == DppCtrl::WAVE_SHR1) || 8344 (Imm == DppCtrl::WAVE_ROR1) || 8345 (Imm == DppCtrl::ROW_MIRROR) || 8346 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8347 (Imm == DppCtrl::BCAST15) || 8348 (Imm == DppCtrl::BCAST31) || 8349 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8350 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8351 } 8352 return false; 8353 } 8354 8355 //===----------------------------------------------------------------------===// 8356 // mAI 8357 //===----------------------------------------------------------------------===// 8358 8359 bool AMDGPUOperand::isBLGP() const { 8360 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8361 } 8362 8363 bool AMDGPUOperand::isCBSZ() const { 8364 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8365 } 8366 8367 bool AMDGPUOperand::isABID() const { 8368 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8369 } 8370 8371 bool AMDGPUOperand::isS16Imm() const { 8372 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8373 } 8374 8375 bool AMDGPUOperand::isU16Imm() const { 8376 return isImm() && isUInt<16>(getImm()); 8377 } 8378 8379 //===----------------------------------------------------------------------===// 8380 // dim 8381 //===----------------------------------------------------------------------===// 8382 8383 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8384 // We want to allow "dim:1D" etc., 8385 // but the initial 1 is tokenized as an integer. 8386 std::string Token; 8387 if (isToken(AsmToken::Integer)) { 8388 SMLoc Loc = getToken().getEndLoc(); 8389 Token = std::string(getTokenStr()); 8390 lex(); 8391 if (getLoc() != Loc) 8392 return false; 8393 } 8394 8395 StringRef Suffix; 8396 if (!parseId(Suffix)) 8397 return false; 8398 Token += Suffix; 8399 8400 StringRef DimId = Token; 8401 if (DimId.startswith("SQ_RSRC_IMG_")) 8402 DimId = DimId.drop_front(12); 8403 8404 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8405 if (!DimInfo) 8406 return false; 8407 8408 Encoding = DimInfo->Encoding; 8409 return true; 8410 } 8411 8412 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8413 if (!isGFX10Plus()) 8414 return MatchOperand_NoMatch; 8415 8416 SMLoc S = getLoc(); 8417 8418 if (!trySkipId("dim", AsmToken::Colon)) 8419 return MatchOperand_NoMatch; 8420 8421 unsigned Encoding; 8422 SMLoc Loc = getLoc(); 8423 if (!parseDimId(Encoding)) { 8424 Error(Loc, "invalid dim value"); 8425 return MatchOperand_ParseFail; 8426 } 8427 8428 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8429 AMDGPUOperand::ImmTyDim)); 8430 return MatchOperand_Success; 8431 } 8432 8433 //===----------------------------------------------------------------------===// 8434 // dpp 8435 //===----------------------------------------------------------------------===// 8436 8437 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8438 SMLoc S = getLoc(); 8439 8440 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8441 return MatchOperand_NoMatch; 8442 8443 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8444 8445 int64_t Sels[8]; 8446 8447 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8448 return MatchOperand_ParseFail; 8449 8450 for (size_t i = 0; i < 8; ++i) { 8451 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8452 return MatchOperand_ParseFail; 8453 8454 SMLoc Loc = getLoc(); 8455 if (getParser().parseAbsoluteExpression(Sels[i])) 8456 return MatchOperand_ParseFail; 8457 if (0 > Sels[i] || 7 < Sels[i]) { 8458 Error(Loc, "expected a 3-bit value"); 8459 return MatchOperand_ParseFail; 8460 } 8461 } 8462 8463 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8464 return MatchOperand_ParseFail; 8465 8466 unsigned DPP8 = 0; 8467 for (size_t i = 0; i < 8; ++i) 8468 DPP8 |= (Sels[i] << (i * 3)); 8469 8470 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8471 return MatchOperand_Success; 8472 } 8473 8474 bool 8475 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8476 const OperandVector &Operands) { 8477 if (Ctrl == "row_newbcast") 8478 return isGFX90A(); 8479 8480 if (Ctrl == "row_share" || 8481 Ctrl == "row_xmask") 8482 return isGFX10Plus(); 8483 8484 if (Ctrl == "wave_shl" || 8485 Ctrl == "wave_shr" || 8486 Ctrl == "wave_rol" || 8487 Ctrl == "wave_ror" || 8488 Ctrl == "row_bcast") 8489 return isVI() || isGFX9(); 8490 8491 return Ctrl == "row_mirror" || 8492 Ctrl == "row_half_mirror" || 8493 Ctrl == "quad_perm" || 8494 Ctrl == "row_shl" || 8495 Ctrl == "row_shr" || 8496 Ctrl == "row_ror"; 8497 } 8498 8499 int64_t 8500 AMDGPUAsmParser::parseDPPCtrlPerm() { 8501 // quad_perm:[%d,%d,%d,%d] 8502 8503 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8504 return -1; 8505 8506 int64_t Val = 0; 8507 for (int i = 0; i < 4; ++i) { 8508 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8509 return -1; 8510 8511 int64_t Temp; 8512 SMLoc Loc = getLoc(); 8513 if (getParser().parseAbsoluteExpression(Temp)) 8514 return -1; 8515 if (Temp < 0 || Temp > 3) { 8516 Error(Loc, "expected a 2-bit value"); 8517 return -1; 8518 } 8519 8520 Val += (Temp << i * 2); 8521 } 8522 8523 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8524 return -1; 8525 8526 return Val; 8527 } 8528 8529 int64_t 8530 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8531 using namespace AMDGPU::DPP; 8532 8533 // sel:%d 8534 8535 int64_t Val; 8536 SMLoc Loc = getLoc(); 8537 8538 if (getParser().parseAbsoluteExpression(Val)) 8539 return -1; 8540 8541 struct DppCtrlCheck { 8542 int64_t Ctrl; 8543 int Lo; 8544 int Hi; 8545 }; 8546 8547 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8548 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8549 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8550 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8551 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8552 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8553 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8554 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8555 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8556 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8557 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8558 .Default({-1, 0, 0}); 8559 8560 bool Valid; 8561 if (Check.Ctrl == -1) { 8562 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8563 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8564 } else { 8565 Valid = Check.Lo <= Val && Val <= Check.Hi; 8566 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8567 } 8568 8569 if (!Valid) { 8570 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8571 return -1; 8572 } 8573 8574 return Val; 8575 } 8576 8577 OperandMatchResultTy 8578 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8579 using namespace AMDGPU::DPP; 8580 8581 if (!isToken(AsmToken::Identifier) || 8582 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8583 return MatchOperand_NoMatch; 8584 8585 SMLoc S = getLoc(); 8586 int64_t Val = -1; 8587 StringRef Ctrl; 8588 8589 parseId(Ctrl); 8590 8591 if (Ctrl == "row_mirror") { 8592 Val = DppCtrl::ROW_MIRROR; 8593 } else if (Ctrl == "row_half_mirror") { 8594 Val = DppCtrl::ROW_HALF_MIRROR; 8595 } else { 8596 if (skipToken(AsmToken::Colon, "expected a colon")) { 8597 if (Ctrl == "quad_perm") { 8598 Val = parseDPPCtrlPerm(); 8599 } else { 8600 Val = parseDPPCtrlSel(Ctrl); 8601 } 8602 } 8603 } 8604 8605 if (Val == -1) 8606 return MatchOperand_ParseFail; 8607 8608 Operands.push_back( 8609 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8610 return MatchOperand_Success; 8611 } 8612 8613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8614 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8615 } 8616 8617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8618 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8619 } 8620 8621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8622 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8623 } 8624 8625 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8626 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8627 } 8628 8629 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8630 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8631 } 8632 8633 // Add dummy $old operand 8634 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8635 const OperandVector &Operands, 8636 bool IsDPP8) { 8637 Inst.addOperand(MCOperand::createReg(0)); 8638 cvtVOP3DPP(Inst, Operands, IsDPP8); 8639 } 8640 8641 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8642 OptionalImmIndexMap OptionalIdx; 8643 unsigned Opc = Inst.getOpcode(); 8644 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8645 unsigned I = 1; 8646 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8647 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8648 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8649 } 8650 8651 int Fi = 0; 8652 for (unsigned E = Operands.size(); I != E; ++I) { 8653 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8654 MCOI::TIED_TO); 8655 if (TiedTo != -1) { 8656 assert((unsigned)TiedTo < Inst.getNumOperands()); 8657 // handle tied old or src2 for MAC instructions 8658 Inst.addOperand(Inst.getOperand(TiedTo)); 8659 } 8660 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8661 // Add the register arguments 8662 if (IsDPP8 && Op.isFI()) { 8663 Fi = Op.getImm(); 8664 } else if (HasModifiers && 8665 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8666 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8667 } else if (Op.isReg()) { 8668 Op.addRegOperands(Inst, 1); 8669 } else if (Op.isImm() && 8670 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8671 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8672 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8673 Op.addImmOperands(Inst, 1); 8674 } else if (Op.isImm()) { 8675 OptionalIdx[Op.getImmTy()] = I; 8676 } else { 8677 llvm_unreachable("unhandled operand type"); 8678 } 8679 } 8680 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8681 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8682 } 8683 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8685 } 8686 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8687 cvtVOP3P(Inst, Operands, OptionalIdx); 8688 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8690 } 8691 8692 if (IsDPP8) { 8693 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8694 using namespace llvm::AMDGPU::DPP; 8695 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8696 } else { 8697 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8701 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8703 } 8704 } 8705 } 8706 8707 // Add dummy $old operand 8708 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8709 const OperandVector &Operands, 8710 bool IsDPP8) { 8711 Inst.addOperand(MCOperand::createReg(0)); 8712 cvtDPP(Inst, Operands, IsDPP8); 8713 } 8714 8715 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8716 OptionalImmIndexMap OptionalIdx; 8717 8718 unsigned Opc = Inst.getOpcode(); 8719 bool HasModifiers = 8720 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8721 unsigned I = 1; 8722 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8723 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8724 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8725 } 8726 8727 int Fi = 0; 8728 for (unsigned E = Operands.size(); I != E; ++I) { 8729 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8730 MCOI::TIED_TO); 8731 if (TiedTo != -1) { 8732 assert((unsigned)TiedTo < Inst.getNumOperands()); 8733 // handle tied old or src2 for MAC instructions 8734 Inst.addOperand(Inst.getOperand(TiedTo)); 8735 } 8736 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8737 // Add the register arguments 8738 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8739 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8740 // Skip it. 8741 continue; 8742 } 8743 8744 if (IsDPP8) { 8745 if (Op.isDPP8()) { 8746 Op.addImmOperands(Inst, 1); 8747 } else if (HasModifiers && 8748 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8749 Op.addRegWithFPInputModsOperands(Inst, 2); 8750 } else if (Op.isFI()) { 8751 Fi = Op.getImm(); 8752 } else if (Op.isReg()) { 8753 Op.addRegOperands(Inst, 1); 8754 } else { 8755 llvm_unreachable("Invalid operand type"); 8756 } 8757 } else { 8758 if (HasModifiers && 8759 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8760 Op.addRegWithFPInputModsOperands(Inst, 2); 8761 } else if (Op.isReg()) { 8762 Op.addRegOperands(Inst, 1); 8763 } else if (Op.isDPPCtrl()) { 8764 Op.addImmOperands(Inst, 1); 8765 } else if (Op.isImm()) { 8766 // Handle optional arguments 8767 OptionalIdx[Op.getImmTy()] = I; 8768 } else { 8769 llvm_unreachable("Invalid operand type"); 8770 } 8771 } 8772 } 8773 8774 if (IsDPP8) { 8775 using namespace llvm::AMDGPU::DPP; 8776 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8777 } else { 8778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8781 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8783 } 8784 } 8785 } 8786 8787 //===----------------------------------------------------------------------===// 8788 // sdwa 8789 //===----------------------------------------------------------------------===// 8790 8791 OperandMatchResultTy 8792 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8793 AMDGPUOperand::ImmTy Type) { 8794 using namespace llvm::AMDGPU::SDWA; 8795 8796 SMLoc S = getLoc(); 8797 StringRef Value; 8798 OperandMatchResultTy res; 8799 8800 SMLoc StringLoc; 8801 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8802 if (res != MatchOperand_Success) { 8803 return res; 8804 } 8805 8806 int64_t Int; 8807 Int = StringSwitch<int64_t>(Value) 8808 .Case("BYTE_0", SdwaSel::BYTE_0) 8809 .Case("BYTE_1", SdwaSel::BYTE_1) 8810 .Case("BYTE_2", SdwaSel::BYTE_2) 8811 .Case("BYTE_3", SdwaSel::BYTE_3) 8812 .Case("WORD_0", SdwaSel::WORD_0) 8813 .Case("WORD_1", SdwaSel::WORD_1) 8814 .Case("DWORD", SdwaSel::DWORD) 8815 .Default(0xffffffff); 8816 8817 if (Int == 0xffffffff) { 8818 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8819 return MatchOperand_ParseFail; 8820 } 8821 8822 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8823 return MatchOperand_Success; 8824 } 8825 8826 OperandMatchResultTy 8827 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8828 using namespace llvm::AMDGPU::SDWA; 8829 8830 SMLoc S = getLoc(); 8831 StringRef Value; 8832 OperandMatchResultTy res; 8833 8834 SMLoc StringLoc; 8835 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8836 if (res != MatchOperand_Success) { 8837 return res; 8838 } 8839 8840 int64_t Int; 8841 Int = StringSwitch<int64_t>(Value) 8842 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8843 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8844 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8845 .Default(0xffffffff); 8846 8847 if (Int == 0xffffffff) { 8848 Error(StringLoc, "invalid dst_unused value"); 8849 return MatchOperand_ParseFail; 8850 } 8851 8852 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8853 return MatchOperand_Success; 8854 } 8855 8856 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8857 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8858 } 8859 8860 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8861 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8862 } 8863 8864 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8865 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8866 } 8867 8868 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8869 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8870 } 8871 8872 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8873 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8874 } 8875 8876 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8877 uint64_t BasicInstType, 8878 bool SkipDstVcc, 8879 bool SkipSrcVcc) { 8880 using namespace llvm::AMDGPU::SDWA; 8881 8882 OptionalImmIndexMap OptionalIdx; 8883 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8884 bool SkippedVcc = false; 8885 8886 unsigned I = 1; 8887 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8888 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8889 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8890 } 8891 8892 for (unsigned E = Operands.size(); I != E; ++I) { 8893 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8894 if (SkipVcc && !SkippedVcc && Op.isReg() && 8895 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8896 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8897 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8898 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8899 // Skip VCC only if we didn't skip it on previous iteration. 8900 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8901 if (BasicInstType == SIInstrFlags::VOP2 && 8902 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8903 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8904 SkippedVcc = true; 8905 continue; 8906 } else if (BasicInstType == SIInstrFlags::VOPC && 8907 Inst.getNumOperands() == 0) { 8908 SkippedVcc = true; 8909 continue; 8910 } 8911 } 8912 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8913 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8914 } else if (Op.isImm()) { 8915 // Handle optional arguments 8916 OptionalIdx[Op.getImmTy()] = I; 8917 } else { 8918 llvm_unreachable("Invalid operand type"); 8919 } 8920 SkippedVcc = false; 8921 } 8922 8923 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8924 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8925 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8926 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8927 switch (BasicInstType) { 8928 case SIInstrFlags::VOP1: 8929 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8930 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8932 } 8933 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8936 break; 8937 8938 case SIInstrFlags::VOP2: 8939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8940 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8942 } 8943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8947 break; 8948 8949 case SIInstrFlags::VOPC: 8950 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8954 break; 8955 8956 default: 8957 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8958 } 8959 } 8960 8961 // special case v_mac_{f16, f32}: 8962 // it has src2 register operand that is tied to dst operand 8963 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8964 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8965 auto it = Inst.begin(); 8966 std::advance( 8967 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8968 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8969 } 8970 } 8971 8972 //===----------------------------------------------------------------------===// 8973 // mAI 8974 //===----------------------------------------------------------------------===// 8975 8976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8977 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8978 } 8979 8980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8981 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8982 } 8983 8984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8985 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8986 } 8987 8988 /// Force static initialization. 8989 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8990 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8991 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8992 } 8993 8994 #define GET_REGISTER_MATCHER 8995 #define GET_MATCHER_IMPLEMENTATION 8996 #define GET_MNEMONIC_SPELL_CHECKER 8997 #define GET_MNEMONIC_CHECKER 8998 #include "AMDGPUGenAsmMatcher.inc" 8999 9000 // This function should be defined after auto-generated include so that we have 9001 // MatchClassKind enum defined 9002 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9003 unsigned Kind) { 9004 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9005 // But MatchInstructionImpl() expects to meet token and fails to validate 9006 // operand. This method checks if we are given immediate operand but expect to 9007 // get corresponding token. 9008 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9009 switch (Kind) { 9010 case MCK_addr64: 9011 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9012 case MCK_gds: 9013 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9014 case MCK_lds: 9015 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9016 case MCK_idxen: 9017 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9018 case MCK_offen: 9019 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9020 case MCK_SSrcB32: 9021 // When operands have expression values, they will return true for isToken, 9022 // because it is not possible to distinguish between a token and an 9023 // expression at parse time. MatchInstructionImpl() will always try to 9024 // match an operand as a token, when isToken returns true, and when the 9025 // name of the expression is not a valid token, the match will fail, 9026 // so we need to handle it here. 9027 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9028 case MCK_SSrcF32: 9029 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9030 case MCK_SoppBrTarget: 9031 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9032 case MCK_VReg32OrOff: 9033 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9034 case MCK_InterpSlot: 9035 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9036 case MCK_Attr: 9037 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9038 case MCK_AttrChan: 9039 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9040 case MCK_ImmSMEMOffset: 9041 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9042 case MCK_SReg_64: 9043 case MCK_SReg_64_XEXEC: 9044 // Null is defined as a 32-bit register but 9045 // it should also be enabled with 64-bit operands. 9046 // The following code enables it for SReg_64 operands 9047 // used as source and destination. Remaining source 9048 // operands are handled in isInlinableImm. 9049 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9050 default: 9051 return Match_InvalidOperand; 9052 } 9053 } 9054 9055 //===----------------------------------------------------------------------===// 9056 // endpgm 9057 //===----------------------------------------------------------------------===// 9058 9059 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9060 SMLoc S = getLoc(); 9061 int64_t Imm = 0; 9062 9063 if (!parseExpr(Imm)) { 9064 // The operand is optional, if not present default to 0 9065 Imm = 0; 9066 } 9067 9068 if (!isUInt<16>(Imm)) { 9069 Error(S, "expected a 16-bit value"); 9070 return MatchOperand_ParseFail; 9071 } 9072 9073 Operands.push_back( 9074 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9075 return MatchOperand_Success; 9076 } 9077 9078 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9079 9080 //===----------------------------------------------------------------------===// 9081 // LDSDIR 9082 //===----------------------------------------------------------------------===// 9083 9084 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9085 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9086 } 9087 9088 bool AMDGPUOperand::isWaitVDST() const { 9089 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9090 } 9091 9092 //===----------------------------------------------------------------------===// 9093 // VINTERP 9094 //===----------------------------------------------------------------------===// 9095 9096 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9097 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9098 } 9099 9100 bool AMDGPUOperand::isWaitEXP() const { 9101 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9102 } 9103