1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1742 OptionalImmIndexMap &OptionalIdx); 1743 1744 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1745 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1746 1747 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1748 bool IsAtomic = false); 1749 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1750 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1751 1752 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1753 1754 bool parseDimId(unsigned &Encoding); 1755 OperandMatchResultTy parseDim(OperandVector &Operands); 1756 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1757 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1758 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1759 int64_t parseDPPCtrlSel(StringRef Ctrl); 1760 int64_t parseDPPCtrlPerm(); 1761 AMDGPUOperand::Ptr defaultRowMask() const; 1762 AMDGPUOperand::Ptr defaultBankMask() const; 1763 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1764 AMDGPUOperand::Ptr defaultFI() const; 1765 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1766 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1767 cvtDPP(Inst, Operands, true); 1768 } 1769 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1770 bool IsDPP8 = false); 1771 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1772 cvtVOPCNoDstDPP(Inst, Operands, true); 1773 } 1774 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1775 bool IsDPP8 = false); 1776 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1777 cvtVOP3DPP(Inst, Operands, true); 1778 } 1779 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1780 bool IsDPP8 = false); 1781 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1782 cvtVOPC64NoDstDPP(Inst, Operands, true); 1783 } 1784 1785 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1786 AMDGPUOperand::ImmTy Type); 1787 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1788 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1789 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1794 uint64_t BasicInstType, 1795 bool SkipDstVcc = false, 1796 bool SkipSrcVcc = false); 1797 1798 AMDGPUOperand::Ptr defaultBLGP() const; 1799 AMDGPUOperand::Ptr defaultCBSZ() const; 1800 AMDGPUOperand::Ptr defaultABID() const; 1801 1802 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1803 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1804 1805 AMDGPUOperand::Ptr defaultWaitVDST() const; 1806 AMDGPUOperand::Ptr defaultWaitEXP() const; 1807 }; 1808 1809 struct OptionalOperand { 1810 const char *Name; 1811 AMDGPUOperand::ImmTy Type; 1812 bool IsBit; 1813 bool (*ConvertResult)(int64_t&); 1814 }; 1815 1816 } // end anonymous namespace 1817 1818 // May be called with integer type with equivalent bitwidth. 1819 static const fltSemantics *getFltSemantics(unsigned Size) { 1820 switch (Size) { 1821 case 4: 1822 return &APFloat::IEEEsingle(); 1823 case 8: 1824 return &APFloat::IEEEdouble(); 1825 case 2: 1826 return &APFloat::IEEEhalf(); 1827 default: 1828 llvm_unreachable("unsupported fp type"); 1829 } 1830 } 1831 1832 static const fltSemantics *getFltSemantics(MVT VT) { 1833 return getFltSemantics(VT.getSizeInBits() / 8); 1834 } 1835 1836 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1837 switch (OperandType) { 1838 case AMDGPU::OPERAND_REG_IMM_INT32: 1839 case AMDGPU::OPERAND_REG_IMM_FP32: 1840 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1841 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1842 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1843 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1844 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1846 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1848 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1849 case AMDGPU::OPERAND_KIMM32: 1850 return &APFloat::IEEEsingle(); 1851 case AMDGPU::OPERAND_REG_IMM_INT64: 1852 case AMDGPU::OPERAND_REG_IMM_FP64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1854 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1856 return &APFloat::IEEEdouble(); 1857 case AMDGPU::OPERAND_REG_IMM_INT16: 1858 case AMDGPU::OPERAND_REG_IMM_FP16: 1859 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1860 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1861 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1862 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1868 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1869 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1870 case AMDGPU::OPERAND_KIMM16: 1871 return &APFloat::IEEEhalf(); 1872 default: 1873 llvm_unreachable("unsupported fp type"); 1874 } 1875 } 1876 1877 //===----------------------------------------------------------------------===// 1878 // Operand 1879 //===----------------------------------------------------------------------===// 1880 1881 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1882 bool Lost; 1883 1884 // Convert literal to single precision 1885 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1886 APFloat::rmNearestTiesToEven, 1887 &Lost); 1888 // We allow precision lost but not overflow or underflow 1889 if (Status != APFloat::opOK && 1890 Lost && 1891 ((Status & APFloat::opOverflow) != 0 || 1892 (Status & APFloat::opUnderflow) != 0)) { 1893 return false; 1894 } 1895 1896 return true; 1897 } 1898 1899 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1900 return isUIntN(Size, Val) || isIntN(Size, Val); 1901 } 1902 1903 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1904 if (VT.getScalarType() == MVT::i16) { 1905 // FP immediate values are broken. 1906 return isInlinableIntLiteral(Val); 1907 } 1908 1909 // f16/v2f16 operands work correctly for all values. 1910 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1911 } 1912 1913 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1914 1915 // This is a hack to enable named inline values like 1916 // shared_base with both 32-bit and 64-bit operands. 1917 // Note that these values are defined as 1918 // 32-bit operands only. 1919 if (isInlineValue()) { 1920 return true; 1921 } 1922 1923 if (!isImmTy(ImmTyNone)) { 1924 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1925 return false; 1926 } 1927 // TODO: We should avoid using host float here. It would be better to 1928 // check the float bit values which is what a few other places do. 1929 // We've had bot failures before due to weird NaN support on mips hosts. 1930 1931 APInt Literal(64, Imm.Val); 1932 1933 if (Imm.IsFPImm) { // We got fp literal token 1934 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1935 return AMDGPU::isInlinableLiteral64(Imm.Val, 1936 AsmParser->hasInv2PiInlineImm()); 1937 } 1938 1939 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1940 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1941 return false; 1942 1943 if (type.getScalarSizeInBits() == 16) { 1944 return isInlineableLiteralOp16( 1945 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1946 type, AsmParser->hasInv2PiInlineImm()); 1947 } 1948 1949 // Check if single precision literal is inlinable 1950 return AMDGPU::isInlinableLiteral32( 1951 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1952 AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 // We got int literal token. 1956 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1957 return AMDGPU::isInlinableLiteral64(Imm.Val, 1958 AsmParser->hasInv2PiInlineImm()); 1959 } 1960 1961 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1962 return false; 1963 } 1964 1965 if (type.getScalarSizeInBits() == 16) { 1966 return isInlineableLiteralOp16( 1967 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1968 type, AsmParser->hasInv2PiInlineImm()); 1969 } 1970 1971 return AMDGPU::isInlinableLiteral32( 1972 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1973 AsmParser->hasInv2PiInlineImm()); 1974 } 1975 1976 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1977 // Check that this immediate can be added as literal 1978 if (!isImmTy(ImmTyNone)) { 1979 return false; 1980 } 1981 1982 if (!Imm.IsFPImm) { 1983 // We got int literal token. 1984 1985 if (type == MVT::f64 && hasFPModifiers()) { 1986 // Cannot apply fp modifiers to int literals preserving the same semantics 1987 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1988 // disable these cases. 1989 return false; 1990 } 1991 1992 unsigned Size = type.getSizeInBits(); 1993 if (Size == 64) 1994 Size = 32; 1995 1996 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1997 // types. 1998 return isSafeTruncation(Imm.Val, Size); 1999 } 2000 2001 // We got fp literal token 2002 if (type == MVT::f64) { // Expected 64-bit fp operand 2003 // We would set low 64-bits of literal to zeroes but we accept this literals 2004 return true; 2005 } 2006 2007 if (type == MVT::i64) { // Expected 64-bit int operand 2008 // We don't allow fp literals in 64-bit integer instructions. It is 2009 // unclear how we should encode them. 2010 return false; 2011 } 2012 2013 // We allow fp literals with f16x2 operands assuming that the specified 2014 // literal goes into the lower half and the upper half is zero. We also 2015 // require that the literal may be losslessly converted to f16. 2016 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2017 (type == MVT::v2i16)? MVT::i16 : 2018 (type == MVT::v2f32)? MVT::f32 : type; 2019 2020 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2021 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2022 } 2023 2024 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2025 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2026 } 2027 2028 bool AMDGPUOperand::isVRegWithInputMods() const { 2029 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2030 // GFX90A allows DPP on 64-bit operands. 2031 (isRegClass(AMDGPU::VReg_64RegClassID) && 2032 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2033 } 2034 2035 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2036 if (AsmParser->isVI()) 2037 return isVReg32(); 2038 else if (AsmParser->isGFX9Plus()) 2039 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2040 else 2041 return false; 2042 } 2043 2044 bool AMDGPUOperand::isSDWAFP16Operand() const { 2045 return isSDWAOperand(MVT::f16); 2046 } 2047 2048 bool AMDGPUOperand::isSDWAFP32Operand() const { 2049 return isSDWAOperand(MVT::f32); 2050 } 2051 2052 bool AMDGPUOperand::isSDWAInt16Operand() const { 2053 return isSDWAOperand(MVT::i16); 2054 } 2055 2056 bool AMDGPUOperand::isSDWAInt32Operand() const { 2057 return isSDWAOperand(MVT::i32); 2058 } 2059 2060 bool AMDGPUOperand::isBoolReg() const { 2061 auto FB = AsmParser->getFeatureBits(); 2062 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2063 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2064 } 2065 2066 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2067 { 2068 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2069 assert(Size == 2 || Size == 4 || Size == 8); 2070 2071 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2072 2073 if (Imm.Mods.Abs) { 2074 Val &= ~FpSignMask; 2075 } 2076 if (Imm.Mods.Neg) { 2077 Val ^= FpSignMask; 2078 } 2079 2080 return Val; 2081 } 2082 2083 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2084 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2085 Inst.getNumOperands())) { 2086 addLiteralImmOperand(Inst, Imm.Val, 2087 ApplyModifiers & 2088 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2089 } else { 2090 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2091 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2092 setImmKindNone(); 2093 } 2094 } 2095 2096 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2097 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2098 auto OpNum = Inst.getNumOperands(); 2099 // Check that this operand accepts literals 2100 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2101 2102 if (ApplyModifiers) { 2103 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2104 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2105 Val = applyInputFPModifiers(Val, Size); 2106 } 2107 2108 APInt Literal(64, Val); 2109 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2110 2111 if (Imm.IsFPImm) { // We got fp literal token 2112 switch (OpTy) { 2113 case AMDGPU::OPERAND_REG_IMM_INT64: 2114 case AMDGPU::OPERAND_REG_IMM_FP64: 2115 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2116 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2118 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2119 AsmParser->hasInv2PiInlineImm())) { 2120 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2121 setImmKindConst(); 2122 return; 2123 } 2124 2125 // Non-inlineable 2126 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2127 // For fp operands we check if low 32 bits are zeros 2128 if (Literal.getLoBits(32) != 0) { 2129 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2130 "Can't encode literal as exact 64-bit floating-point operand. " 2131 "Low 32-bits will be set to zero"); 2132 } 2133 2134 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2135 setImmKindLiteral(); 2136 return; 2137 } 2138 2139 // We don't allow fp literals in 64-bit integer instructions. It is 2140 // unclear how we should encode them. This case should be checked earlier 2141 // in predicate methods (isLiteralImm()) 2142 llvm_unreachable("fp literal in 64-bit integer instruction."); 2143 2144 case AMDGPU::OPERAND_REG_IMM_INT32: 2145 case AMDGPU::OPERAND_REG_IMM_FP32: 2146 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2147 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2148 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2149 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2150 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2151 case AMDGPU::OPERAND_REG_IMM_INT16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16: 2153 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2156 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2162 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2163 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2164 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2166 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2167 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2168 case AMDGPU::OPERAND_KIMM32: 2169 case AMDGPU::OPERAND_KIMM16: { 2170 bool lost; 2171 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2172 // Convert literal to single precision 2173 FPLiteral.convert(*getOpFltSemantics(OpTy), 2174 APFloat::rmNearestTiesToEven, &lost); 2175 // We allow precision lost but not overflow or underflow. This should be 2176 // checked earlier in isLiteralImm() 2177 2178 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2179 Inst.addOperand(MCOperand::createImm(ImmVal)); 2180 setImmKindLiteral(); 2181 return; 2182 } 2183 default: 2184 llvm_unreachable("invalid operand size"); 2185 } 2186 2187 return; 2188 } 2189 2190 // We got int literal token. 2191 // Only sign extend inline immediates. 2192 switch (OpTy) { 2193 case AMDGPU::OPERAND_REG_IMM_INT32: 2194 case AMDGPU::OPERAND_REG_IMM_FP32: 2195 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2196 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2197 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2198 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2200 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2201 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2202 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2203 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2204 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2206 if (isSafeTruncation(Val, 32) && 2207 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2208 AsmParser->hasInv2PiInlineImm())) { 2209 Inst.addOperand(MCOperand::createImm(Val)); 2210 setImmKindConst(); 2211 return; 2212 } 2213 2214 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2215 setImmKindLiteral(); 2216 return; 2217 2218 case AMDGPU::OPERAND_REG_IMM_INT64: 2219 case AMDGPU::OPERAND_REG_IMM_FP64: 2220 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2221 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2222 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2223 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 setImmKindConst(); 2226 return; 2227 } 2228 2229 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2230 setImmKindLiteral(); 2231 return; 2232 2233 case AMDGPU::OPERAND_REG_IMM_INT16: 2234 case AMDGPU::OPERAND_REG_IMM_FP16: 2235 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2236 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2237 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2240 if (isSafeTruncation(Val, 16) && 2241 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2242 AsmParser->hasInv2PiInlineImm())) { 2243 Inst.addOperand(MCOperand::createImm(Val)); 2244 setImmKindConst(); 2245 return; 2246 } 2247 2248 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2249 setImmKindLiteral(); 2250 return; 2251 2252 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2253 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2254 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2255 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2256 assert(isSafeTruncation(Val, 16)); 2257 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2258 AsmParser->hasInv2PiInlineImm())); 2259 2260 Inst.addOperand(MCOperand::createImm(Val)); 2261 return; 2262 } 2263 case AMDGPU::OPERAND_KIMM32: 2264 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2265 setImmKindNone(); 2266 return; 2267 case AMDGPU::OPERAND_KIMM16: 2268 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2269 setImmKindNone(); 2270 return; 2271 default: 2272 llvm_unreachable("invalid operand size"); 2273 } 2274 } 2275 2276 template <unsigned Bitwidth> 2277 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2278 APInt Literal(64, Imm.Val); 2279 setImmKindNone(); 2280 2281 if (!Imm.IsFPImm) { 2282 // We got int literal token. 2283 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2284 return; 2285 } 2286 2287 bool Lost; 2288 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2289 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2290 APFloat::rmNearestTiesToEven, &Lost); 2291 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2292 } 2293 2294 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2295 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2296 } 2297 2298 static bool isInlineValue(unsigned Reg) { 2299 switch (Reg) { 2300 case AMDGPU::SRC_SHARED_BASE: 2301 case AMDGPU::SRC_SHARED_LIMIT: 2302 case AMDGPU::SRC_PRIVATE_BASE: 2303 case AMDGPU::SRC_PRIVATE_LIMIT: 2304 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2305 return true; 2306 case AMDGPU::SRC_VCCZ: 2307 case AMDGPU::SRC_EXECZ: 2308 case AMDGPU::SRC_SCC: 2309 return true; 2310 case AMDGPU::SGPR_NULL: 2311 return true; 2312 default: 2313 return false; 2314 } 2315 } 2316 2317 bool AMDGPUOperand::isInlineValue() const { 2318 return isRegKind() && ::isInlineValue(getReg()); 2319 } 2320 2321 //===----------------------------------------------------------------------===// 2322 // AsmParser 2323 //===----------------------------------------------------------------------===// 2324 2325 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2326 if (Is == IS_VGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::VGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::VReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::VReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::VReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::VReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::VReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::VReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::VReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::VReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::VReg_1024RegClassID; 2349 } 2350 } else if (Is == IS_TTMP) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::TTMP_32RegClassID; 2355 case 64: 2356 return AMDGPU::TTMP_64RegClassID; 2357 case 128: 2358 return AMDGPU::TTMP_128RegClassID; 2359 case 256: 2360 return AMDGPU::TTMP_256RegClassID; 2361 case 512: 2362 return AMDGPU::TTMP_512RegClassID; 2363 } 2364 } else if (Is == IS_SGPR) { 2365 switch (RegWidth) { 2366 default: return -1; 2367 case 32: 2368 return AMDGPU::SGPR_32RegClassID; 2369 case 64: 2370 return AMDGPU::SGPR_64RegClassID; 2371 case 96: 2372 return AMDGPU::SGPR_96RegClassID; 2373 case 128: 2374 return AMDGPU::SGPR_128RegClassID; 2375 case 160: 2376 return AMDGPU::SGPR_160RegClassID; 2377 case 192: 2378 return AMDGPU::SGPR_192RegClassID; 2379 case 224: 2380 return AMDGPU::SGPR_224RegClassID; 2381 case 256: 2382 return AMDGPU::SGPR_256RegClassID; 2383 case 512: 2384 return AMDGPU::SGPR_512RegClassID; 2385 } 2386 } else if (Is == IS_AGPR) { 2387 switch (RegWidth) { 2388 default: return -1; 2389 case 32: 2390 return AMDGPU::AGPR_32RegClassID; 2391 case 64: 2392 return AMDGPU::AReg_64RegClassID; 2393 case 96: 2394 return AMDGPU::AReg_96RegClassID; 2395 case 128: 2396 return AMDGPU::AReg_128RegClassID; 2397 case 160: 2398 return AMDGPU::AReg_160RegClassID; 2399 case 192: 2400 return AMDGPU::AReg_192RegClassID; 2401 case 224: 2402 return AMDGPU::AReg_224RegClassID; 2403 case 256: 2404 return AMDGPU::AReg_256RegClassID; 2405 case 512: 2406 return AMDGPU::AReg_512RegClassID; 2407 case 1024: 2408 return AMDGPU::AReg_1024RegClassID; 2409 } 2410 } 2411 return -1; 2412 } 2413 2414 static unsigned getSpecialRegForName(StringRef RegName) { 2415 return StringSwitch<unsigned>(RegName) 2416 .Case("exec", AMDGPU::EXEC) 2417 .Case("vcc", AMDGPU::VCC) 2418 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2419 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2420 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2421 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2422 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2423 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2424 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2425 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2426 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2427 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2428 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2429 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2430 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2431 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2432 .Case("m0", AMDGPU::M0) 2433 .Case("vccz", AMDGPU::SRC_VCCZ) 2434 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2435 .Case("execz", AMDGPU::SRC_EXECZ) 2436 .Case("src_execz", AMDGPU::SRC_EXECZ) 2437 .Case("scc", AMDGPU::SRC_SCC) 2438 .Case("src_scc", AMDGPU::SRC_SCC) 2439 .Case("tba", AMDGPU::TBA) 2440 .Case("tma", AMDGPU::TMA) 2441 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2442 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2443 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2444 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2445 .Case("vcc_lo", AMDGPU::VCC_LO) 2446 .Case("vcc_hi", AMDGPU::VCC_HI) 2447 .Case("exec_lo", AMDGPU::EXEC_LO) 2448 .Case("exec_hi", AMDGPU::EXEC_HI) 2449 .Case("tma_lo", AMDGPU::TMA_LO) 2450 .Case("tma_hi", AMDGPU::TMA_HI) 2451 .Case("tba_lo", AMDGPU::TBA_LO) 2452 .Case("tba_hi", AMDGPU::TBA_HI) 2453 .Case("pc", AMDGPU::PC_REG) 2454 .Case("null", AMDGPU::SGPR_NULL) 2455 .Default(AMDGPU::NoRegister); 2456 } 2457 2458 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2459 SMLoc &EndLoc, bool RestoreOnFailure) { 2460 auto R = parseRegister(); 2461 if (!R) return true; 2462 assert(R->isReg()); 2463 RegNo = R->getReg(); 2464 StartLoc = R->getStartLoc(); 2465 EndLoc = R->getEndLoc(); 2466 return false; 2467 } 2468 2469 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2470 SMLoc &EndLoc) { 2471 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2472 } 2473 2474 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2475 SMLoc &StartLoc, 2476 SMLoc &EndLoc) { 2477 bool Result = 2478 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2479 bool PendingErrors = getParser().hasPendingError(); 2480 getParser().clearPendingErrors(); 2481 if (PendingErrors) 2482 return MatchOperand_ParseFail; 2483 if (Result) 2484 return MatchOperand_NoMatch; 2485 return MatchOperand_Success; 2486 } 2487 2488 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2489 RegisterKind RegKind, unsigned Reg1, 2490 SMLoc Loc) { 2491 switch (RegKind) { 2492 case IS_SPECIAL: 2493 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2494 Reg = AMDGPU::EXEC; 2495 RegWidth = 64; 2496 return true; 2497 } 2498 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2499 Reg = AMDGPU::FLAT_SCR; 2500 RegWidth = 64; 2501 return true; 2502 } 2503 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2504 Reg = AMDGPU::XNACK_MASK; 2505 RegWidth = 64; 2506 return true; 2507 } 2508 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2509 Reg = AMDGPU::VCC; 2510 RegWidth = 64; 2511 return true; 2512 } 2513 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2514 Reg = AMDGPU::TBA; 2515 RegWidth = 64; 2516 return true; 2517 } 2518 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2519 Reg = AMDGPU::TMA; 2520 RegWidth = 64; 2521 return true; 2522 } 2523 Error(Loc, "register does not fit in the list"); 2524 return false; 2525 case IS_VGPR: 2526 case IS_SGPR: 2527 case IS_AGPR: 2528 case IS_TTMP: 2529 if (Reg1 != Reg + RegWidth / 32) { 2530 Error(Loc, "registers in a list must have consecutive indices"); 2531 return false; 2532 } 2533 RegWidth += 32; 2534 return true; 2535 default: 2536 llvm_unreachable("unexpected register kind"); 2537 } 2538 } 2539 2540 struct RegInfo { 2541 StringLiteral Name; 2542 RegisterKind Kind; 2543 }; 2544 2545 static constexpr RegInfo RegularRegisters[] = { 2546 {{"v"}, IS_VGPR}, 2547 {{"s"}, IS_SGPR}, 2548 {{"ttmp"}, IS_TTMP}, 2549 {{"acc"}, IS_AGPR}, 2550 {{"a"}, IS_AGPR}, 2551 }; 2552 2553 static bool isRegularReg(RegisterKind Kind) { 2554 return Kind == IS_VGPR || 2555 Kind == IS_SGPR || 2556 Kind == IS_TTMP || 2557 Kind == IS_AGPR; 2558 } 2559 2560 static const RegInfo* getRegularRegInfo(StringRef Str) { 2561 for (const RegInfo &Reg : RegularRegisters) 2562 if (Str.startswith(Reg.Name)) 2563 return &Reg; 2564 return nullptr; 2565 } 2566 2567 static bool getRegNum(StringRef Str, unsigned& Num) { 2568 return !Str.getAsInteger(10, Num); 2569 } 2570 2571 bool 2572 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2573 const AsmToken &NextToken) const { 2574 2575 // A list of consecutive registers: [s0,s1,s2,s3] 2576 if (Token.is(AsmToken::LBrac)) 2577 return true; 2578 2579 if (!Token.is(AsmToken::Identifier)) 2580 return false; 2581 2582 // A single register like s0 or a range of registers like s[0:1] 2583 2584 StringRef Str = Token.getString(); 2585 const RegInfo *Reg = getRegularRegInfo(Str); 2586 if (Reg) { 2587 StringRef RegName = Reg->Name; 2588 StringRef RegSuffix = Str.substr(RegName.size()); 2589 if (!RegSuffix.empty()) { 2590 unsigned Num; 2591 // A single register with an index: rXX 2592 if (getRegNum(RegSuffix, Num)) 2593 return true; 2594 } else { 2595 // A range of registers: r[XX:YY]. 2596 if (NextToken.is(AsmToken::LBrac)) 2597 return true; 2598 } 2599 } 2600 2601 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2602 } 2603 2604 bool 2605 AMDGPUAsmParser::isRegister() 2606 { 2607 return isRegister(getToken(), peekToken()); 2608 } 2609 2610 unsigned 2611 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2612 unsigned RegNum, 2613 unsigned RegWidth, 2614 SMLoc Loc) { 2615 2616 assert(isRegularReg(RegKind)); 2617 2618 unsigned AlignSize = 1; 2619 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2620 // SGPR and TTMP registers must be aligned. 2621 // Max required alignment is 4 dwords. 2622 AlignSize = std::min(RegWidth / 32, 4u); 2623 } 2624 2625 if (RegNum % AlignSize != 0) { 2626 Error(Loc, "invalid register alignment"); 2627 return AMDGPU::NoRegister; 2628 } 2629 2630 unsigned RegIdx = RegNum / AlignSize; 2631 int RCID = getRegClass(RegKind, RegWidth); 2632 if (RCID == -1) { 2633 Error(Loc, "invalid or unsupported register size"); 2634 return AMDGPU::NoRegister; 2635 } 2636 2637 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2638 const MCRegisterClass RC = TRI->getRegClass(RCID); 2639 if (RegIdx >= RC.getNumRegs()) { 2640 Error(Loc, "register index is out of range"); 2641 return AMDGPU::NoRegister; 2642 } 2643 2644 return RC.getRegister(RegIdx); 2645 } 2646 2647 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2648 int64_t RegLo, RegHi; 2649 if (!skipToken(AsmToken::LBrac, "missing register index")) 2650 return false; 2651 2652 SMLoc FirstIdxLoc = getLoc(); 2653 SMLoc SecondIdxLoc; 2654 2655 if (!parseExpr(RegLo)) 2656 return false; 2657 2658 if (trySkipToken(AsmToken::Colon)) { 2659 SecondIdxLoc = getLoc(); 2660 if (!parseExpr(RegHi)) 2661 return false; 2662 } else { 2663 RegHi = RegLo; 2664 } 2665 2666 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2667 return false; 2668 2669 if (!isUInt<32>(RegLo)) { 2670 Error(FirstIdxLoc, "invalid register index"); 2671 return false; 2672 } 2673 2674 if (!isUInt<32>(RegHi)) { 2675 Error(SecondIdxLoc, "invalid register index"); 2676 return false; 2677 } 2678 2679 if (RegLo > RegHi) { 2680 Error(FirstIdxLoc, "first register index should not exceed second index"); 2681 return false; 2682 } 2683 2684 Num = static_cast<unsigned>(RegLo); 2685 RegWidth = 32 * ((RegHi - RegLo) + 1); 2686 return true; 2687 } 2688 2689 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2690 unsigned &RegNum, unsigned &RegWidth, 2691 SmallVectorImpl<AsmToken> &Tokens) { 2692 assert(isToken(AsmToken::Identifier)); 2693 unsigned Reg = getSpecialRegForName(getTokenStr()); 2694 if (Reg) { 2695 RegNum = 0; 2696 RegWidth = 32; 2697 RegKind = IS_SPECIAL; 2698 Tokens.push_back(getToken()); 2699 lex(); // skip register name 2700 } 2701 return Reg; 2702 } 2703 2704 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2705 unsigned &RegNum, unsigned &RegWidth, 2706 SmallVectorImpl<AsmToken> &Tokens) { 2707 assert(isToken(AsmToken::Identifier)); 2708 StringRef RegName = getTokenStr(); 2709 auto Loc = getLoc(); 2710 2711 const RegInfo *RI = getRegularRegInfo(RegName); 2712 if (!RI) { 2713 Error(Loc, "invalid register name"); 2714 return AMDGPU::NoRegister; 2715 } 2716 2717 Tokens.push_back(getToken()); 2718 lex(); // skip register name 2719 2720 RegKind = RI->Kind; 2721 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2722 if (!RegSuffix.empty()) { 2723 // Single 32-bit register: vXX. 2724 if (!getRegNum(RegSuffix, RegNum)) { 2725 Error(Loc, "invalid register index"); 2726 return AMDGPU::NoRegister; 2727 } 2728 RegWidth = 32; 2729 } else { 2730 // Range of registers: v[XX:YY]. ":YY" is optional. 2731 if (!ParseRegRange(RegNum, RegWidth)) 2732 return AMDGPU::NoRegister; 2733 } 2734 2735 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2736 } 2737 2738 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2739 unsigned &RegWidth, 2740 SmallVectorImpl<AsmToken> &Tokens) { 2741 unsigned Reg = AMDGPU::NoRegister; 2742 auto ListLoc = getLoc(); 2743 2744 if (!skipToken(AsmToken::LBrac, 2745 "expected a register or a list of registers")) { 2746 return AMDGPU::NoRegister; 2747 } 2748 2749 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2750 2751 auto Loc = getLoc(); 2752 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2753 return AMDGPU::NoRegister; 2754 if (RegWidth != 32) { 2755 Error(Loc, "expected a single 32-bit register"); 2756 return AMDGPU::NoRegister; 2757 } 2758 2759 for (; trySkipToken(AsmToken::Comma); ) { 2760 RegisterKind NextRegKind; 2761 unsigned NextReg, NextRegNum, NextRegWidth; 2762 Loc = getLoc(); 2763 2764 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2765 NextRegNum, NextRegWidth, 2766 Tokens)) { 2767 return AMDGPU::NoRegister; 2768 } 2769 if (NextRegWidth != 32) { 2770 Error(Loc, "expected a single 32-bit register"); 2771 return AMDGPU::NoRegister; 2772 } 2773 if (NextRegKind != RegKind) { 2774 Error(Loc, "registers in a list must be of the same kind"); 2775 return AMDGPU::NoRegister; 2776 } 2777 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2778 return AMDGPU::NoRegister; 2779 } 2780 2781 if (!skipToken(AsmToken::RBrac, 2782 "expected a comma or a closing square bracket")) { 2783 return AMDGPU::NoRegister; 2784 } 2785 2786 if (isRegularReg(RegKind)) 2787 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2788 2789 return Reg; 2790 } 2791 2792 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2793 unsigned &RegNum, unsigned &RegWidth, 2794 SmallVectorImpl<AsmToken> &Tokens) { 2795 auto Loc = getLoc(); 2796 Reg = AMDGPU::NoRegister; 2797 2798 if (isToken(AsmToken::Identifier)) { 2799 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2800 if (Reg == AMDGPU::NoRegister) 2801 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2802 } else { 2803 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2804 } 2805 2806 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2807 if (Reg == AMDGPU::NoRegister) { 2808 assert(Parser.hasPendingError()); 2809 return false; 2810 } 2811 2812 if (!subtargetHasRegister(*TRI, Reg)) { 2813 if (Reg == AMDGPU::SGPR_NULL) { 2814 Error(Loc, "'null' operand is not supported on this GPU"); 2815 } else { 2816 Error(Loc, "register not available on this GPU"); 2817 } 2818 return false; 2819 } 2820 2821 return true; 2822 } 2823 2824 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2825 unsigned &RegNum, unsigned &RegWidth, 2826 bool RestoreOnFailure /*=false*/) { 2827 Reg = AMDGPU::NoRegister; 2828 2829 SmallVector<AsmToken, 1> Tokens; 2830 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2831 if (RestoreOnFailure) { 2832 while (!Tokens.empty()) { 2833 getLexer().UnLex(Tokens.pop_back_val()); 2834 } 2835 } 2836 return true; 2837 } 2838 return false; 2839 } 2840 2841 Optional<StringRef> 2842 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2843 switch (RegKind) { 2844 case IS_VGPR: 2845 return StringRef(".amdgcn.next_free_vgpr"); 2846 case IS_SGPR: 2847 return StringRef(".amdgcn.next_free_sgpr"); 2848 default: 2849 return None; 2850 } 2851 } 2852 2853 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2854 auto SymbolName = getGprCountSymbolName(RegKind); 2855 assert(SymbolName && "initializing invalid register kind"); 2856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2857 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2858 } 2859 2860 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2861 unsigned DwordRegIndex, 2862 unsigned RegWidth) { 2863 // Symbols are only defined for GCN targets 2864 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2865 return true; 2866 2867 auto SymbolName = getGprCountSymbolName(RegKind); 2868 if (!SymbolName) 2869 return true; 2870 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2871 2872 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2873 int64_t OldCount; 2874 2875 if (!Sym->isVariable()) 2876 return !Error(getLoc(), 2877 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2878 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2879 return !Error( 2880 getLoc(), 2881 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2882 2883 if (OldCount <= NewMax) 2884 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2885 2886 return true; 2887 } 2888 2889 std::unique_ptr<AMDGPUOperand> 2890 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2891 const auto &Tok = getToken(); 2892 SMLoc StartLoc = Tok.getLoc(); 2893 SMLoc EndLoc = Tok.getEndLoc(); 2894 RegisterKind RegKind; 2895 unsigned Reg, RegNum, RegWidth; 2896 2897 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2898 return nullptr; 2899 } 2900 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2901 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2902 return nullptr; 2903 } else 2904 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2905 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2906 } 2907 2908 OperandMatchResultTy 2909 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2910 // TODO: add syntactic sugar for 1/(2*PI) 2911 2912 assert(!isRegister()); 2913 assert(!isModifier()); 2914 2915 const auto& Tok = getToken(); 2916 const auto& NextTok = peekToken(); 2917 bool IsReal = Tok.is(AsmToken::Real); 2918 SMLoc S = getLoc(); 2919 bool Negate = false; 2920 2921 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2922 lex(); 2923 IsReal = true; 2924 Negate = true; 2925 } 2926 2927 if (IsReal) { 2928 // Floating-point expressions are not supported. 2929 // Can only allow floating-point literals with an 2930 // optional sign. 2931 2932 StringRef Num = getTokenStr(); 2933 lex(); 2934 2935 APFloat RealVal(APFloat::IEEEdouble()); 2936 auto roundMode = APFloat::rmNearestTiesToEven; 2937 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2938 return MatchOperand_ParseFail; 2939 } 2940 if (Negate) 2941 RealVal.changeSign(); 2942 2943 Operands.push_back( 2944 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2945 AMDGPUOperand::ImmTyNone, true)); 2946 2947 return MatchOperand_Success; 2948 2949 } else { 2950 int64_t IntVal; 2951 const MCExpr *Expr; 2952 SMLoc S = getLoc(); 2953 2954 if (HasSP3AbsModifier) { 2955 // This is a workaround for handling expressions 2956 // as arguments of SP3 'abs' modifier, for example: 2957 // |1.0| 2958 // |-1| 2959 // |1+x| 2960 // This syntax is not compatible with syntax of standard 2961 // MC expressions (due to the trailing '|'). 2962 SMLoc EndLoc; 2963 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2964 return MatchOperand_ParseFail; 2965 } else { 2966 if (Parser.parseExpression(Expr)) 2967 return MatchOperand_ParseFail; 2968 } 2969 2970 if (Expr->evaluateAsAbsolute(IntVal)) { 2971 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2972 } else { 2973 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2974 } 2975 2976 return MatchOperand_Success; 2977 } 2978 2979 return MatchOperand_NoMatch; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2984 if (!isRegister()) 2985 return MatchOperand_NoMatch; 2986 2987 if (auto R = parseRegister()) { 2988 assert(R->isReg()); 2989 Operands.push_back(std::move(R)); 2990 return MatchOperand_Success; 2991 } 2992 return MatchOperand_ParseFail; 2993 } 2994 2995 OperandMatchResultTy 2996 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2997 auto res = parseReg(Operands); 2998 if (res != MatchOperand_NoMatch) { 2999 return res; 3000 } else if (isModifier()) { 3001 return MatchOperand_NoMatch; 3002 } else { 3003 return parseImm(Operands, HasSP3AbsMod); 3004 } 3005 } 3006 3007 bool 3008 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3009 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3010 const auto &str = Token.getString(); 3011 return str == "abs" || str == "neg" || str == "sext"; 3012 } 3013 return false; 3014 } 3015 3016 bool 3017 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3018 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3024 } 3025 3026 bool 3027 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3028 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3029 } 3030 3031 // Check if this is an operand modifier or an opcode modifier 3032 // which may look like an expression but it is not. We should 3033 // avoid parsing these modifiers as expressions. Currently 3034 // recognized sequences are: 3035 // |...| 3036 // abs(...) 3037 // neg(...) 3038 // sext(...) 3039 // -reg 3040 // -|...| 3041 // -abs(...) 3042 // name:... 3043 // Note that simple opcode modifiers like 'gds' may be parsed as 3044 // expressions; this is a special case. See getExpressionAsToken. 3045 // 3046 bool 3047 AMDGPUAsmParser::isModifier() { 3048 3049 AsmToken Tok = getToken(); 3050 AsmToken NextToken[2]; 3051 peekTokens(NextToken); 3052 3053 return isOperandModifier(Tok, NextToken[0]) || 3054 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3055 isOpcodeModifierWithVal(Tok, NextToken[0]); 3056 } 3057 3058 // Check if the current token is an SP3 'neg' modifier. 3059 // Currently this modifier is allowed in the following context: 3060 // 3061 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3062 // 2. Before an 'abs' modifier: -abs(...) 3063 // 3. Before an SP3 'abs' modifier: -|...| 3064 // 3065 // In all other cases "-" is handled as a part 3066 // of an expression that follows the sign. 3067 // 3068 // Note: When "-" is followed by an integer literal, 3069 // this is interpreted as integer negation rather 3070 // than a floating-point NEG modifier applied to N. 3071 // Beside being contr-intuitive, such use of floating-point 3072 // NEG modifier would have resulted in different meaning 3073 // of integer literals used with VOP1/2/C and VOP3, 3074 // for example: 3075 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3076 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3077 // Negative fp literals with preceding "-" are 3078 // handled likewise for uniformity 3079 // 3080 bool 3081 AMDGPUAsmParser::parseSP3NegModifier() { 3082 3083 AsmToken NextToken[2]; 3084 peekTokens(NextToken); 3085 3086 if (isToken(AsmToken::Minus) && 3087 (isRegister(NextToken[0], NextToken[1]) || 3088 NextToken[0].is(AsmToken::Pipe) || 3089 isId(NextToken[0], "abs"))) { 3090 lex(); 3091 return true; 3092 } 3093 3094 return false; 3095 } 3096 3097 OperandMatchResultTy 3098 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3099 bool AllowImm) { 3100 bool Neg, SP3Neg; 3101 bool Abs, SP3Abs; 3102 SMLoc Loc; 3103 3104 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3105 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3106 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3107 return MatchOperand_ParseFail; 3108 } 3109 3110 SP3Neg = parseSP3NegModifier(); 3111 3112 Loc = getLoc(); 3113 Neg = trySkipId("neg"); 3114 if (Neg && SP3Neg) { 3115 Error(Loc, "expected register or immediate"); 3116 return MatchOperand_ParseFail; 3117 } 3118 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3119 return MatchOperand_ParseFail; 3120 3121 Abs = trySkipId("abs"); 3122 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3123 return MatchOperand_ParseFail; 3124 3125 Loc = getLoc(); 3126 SP3Abs = trySkipToken(AsmToken::Pipe); 3127 if (Abs && SP3Abs) { 3128 Error(Loc, "expected register or immediate"); 3129 return MatchOperand_ParseFail; 3130 } 3131 3132 OperandMatchResultTy Res; 3133 if (AllowImm) { 3134 Res = parseRegOrImm(Operands, SP3Abs); 3135 } else { 3136 Res = parseReg(Operands); 3137 } 3138 if (Res != MatchOperand_Success) { 3139 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3140 } 3141 3142 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3143 return MatchOperand_ParseFail; 3144 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3145 return MatchOperand_ParseFail; 3146 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3147 return MatchOperand_ParseFail; 3148 3149 AMDGPUOperand::Modifiers Mods; 3150 Mods.Abs = Abs || SP3Abs; 3151 Mods.Neg = Neg || SP3Neg; 3152 3153 if (Mods.hasFPModifiers()) { 3154 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3155 if (Op.isExpr()) { 3156 Error(Op.getStartLoc(), "expected an absolute expression"); 3157 return MatchOperand_ParseFail; 3158 } 3159 Op.setModifiers(Mods); 3160 } 3161 return MatchOperand_Success; 3162 } 3163 3164 OperandMatchResultTy 3165 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3166 bool AllowImm) { 3167 bool Sext = trySkipId("sext"); 3168 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3169 return MatchOperand_ParseFail; 3170 3171 OperandMatchResultTy Res; 3172 if (AllowImm) { 3173 Res = parseRegOrImm(Operands); 3174 } else { 3175 Res = parseReg(Operands); 3176 } 3177 if (Res != MatchOperand_Success) { 3178 return Sext? MatchOperand_ParseFail : Res; 3179 } 3180 3181 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3182 return MatchOperand_ParseFail; 3183 3184 AMDGPUOperand::Modifiers Mods; 3185 Mods.Sext = Sext; 3186 3187 if (Mods.hasIntModifiers()) { 3188 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3189 if (Op.isExpr()) { 3190 Error(Op.getStartLoc(), "expected an absolute expression"); 3191 return MatchOperand_ParseFail; 3192 } 3193 Op.setModifiers(Mods); 3194 } 3195 3196 return MatchOperand_Success; 3197 } 3198 3199 OperandMatchResultTy 3200 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3201 return parseRegOrImmWithFPInputMods(Operands, false); 3202 } 3203 3204 OperandMatchResultTy 3205 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3206 return parseRegOrImmWithIntInputMods(Operands, false); 3207 } 3208 3209 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3210 auto Loc = getLoc(); 3211 if (trySkipId("off")) { 3212 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3213 AMDGPUOperand::ImmTyOff, false)); 3214 return MatchOperand_Success; 3215 } 3216 3217 if (!isRegister()) 3218 return MatchOperand_NoMatch; 3219 3220 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3221 if (Reg) { 3222 Operands.push_back(std::move(Reg)); 3223 return MatchOperand_Success; 3224 } 3225 3226 return MatchOperand_ParseFail; 3227 3228 } 3229 3230 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3231 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3232 3233 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3234 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3235 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3236 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3237 return Match_InvalidOperand; 3238 3239 if ((TSFlags & SIInstrFlags::VOP3) && 3240 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3241 getForcedEncodingSize() != 64) 3242 return Match_PreferE32; 3243 3244 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3245 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3246 // v_mac_f32/16 allow only dst_sel == DWORD; 3247 auto OpNum = 3248 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3249 const auto &Op = Inst.getOperand(OpNum); 3250 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3251 return Match_InvalidOperand; 3252 } 3253 } 3254 3255 return Match_Success; 3256 } 3257 3258 static ArrayRef<unsigned> getAllVariants() { 3259 static const unsigned Variants[] = { 3260 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3261 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3262 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3263 }; 3264 3265 return makeArrayRef(Variants); 3266 } 3267 3268 // What asm variants we should check 3269 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3270 if (isForcedDPP() && isForcedVOP3()) { 3271 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3272 return makeArrayRef(Variants); 3273 } 3274 if (getForcedEncodingSize() == 32) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 if (isForcedVOP3()) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3281 return makeArrayRef(Variants); 3282 } 3283 3284 if (isForcedSDWA()) { 3285 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3286 AMDGPUAsmVariants::SDWA9}; 3287 return makeArrayRef(Variants); 3288 } 3289 3290 if (isForcedDPP()) { 3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3292 return makeArrayRef(Variants); 3293 } 3294 3295 return getAllVariants(); 3296 } 3297 3298 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3299 if (isForcedDPP() && isForcedVOP3()) 3300 return "e64_dpp"; 3301 3302 if (getForcedEncodingSize() == 32) 3303 return "e32"; 3304 3305 if (isForcedVOP3()) 3306 return "e64"; 3307 3308 if (isForcedSDWA()) 3309 return "sdwa"; 3310 3311 if (isForcedDPP()) 3312 return "dpp"; 3313 3314 return ""; 3315 } 3316 3317 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3318 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3319 const unsigned Num = Desc.getNumImplicitUses(); 3320 for (unsigned i = 0; i < Num; ++i) { 3321 unsigned Reg = Desc.ImplicitUses[i]; 3322 switch (Reg) { 3323 case AMDGPU::FLAT_SCR: 3324 case AMDGPU::VCC: 3325 case AMDGPU::VCC_LO: 3326 case AMDGPU::VCC_HI: 3327 case AMDGPU::M0: 3328 return Reg; 3329 default: 3330 break; 3331 } 3332 } 3333 return AMDGPU::NoRegister; 3334 } 3335 3336 // NB: This code is correct only when used to check constant 3337 // bus limitations because GFX7 support no f16 inline constants. 3338 // Note that there are no cases when a GFX7 opcode violates 3339 // constant bus limitations due to the use of an f16 constant. 3340 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3341 unsigned OpIdx) const { 3342 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3343 3344 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3345 return false; 3346 } 3347 3348 const MCOperand &MO = Inst.getOperand(OpIdx); 3349 3350 int64_t Val = MO.getImm(); 3351 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3352 3353 switch (OpSize) { // expected operand size 3354 case 8: 3355 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3356 case 4: 3357 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3358 case 2: { 3359 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3360 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3362 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3363 return AMDGPU::isInlinableIntLiteral(Val); 3364 3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3368 return AMDGPU::isInlinableIntLiteralV216(Val); 3369 3370 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3371 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3372 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3373 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3374 3375 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3376 } 3377 default: 3378 llvm_unreachable("invalid operand size"); 3379 } 3380 } 3381 3382 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3383 if (!isGFX10Plus()) 3384 return 1; 3385 3386 switch (Opcode) { 3387 // 64-bit shift instructions can use only one scalar value input 3388 case AMDGPU::V_LSHLREV_B64_e64: 3389 case AMDGPU::V_LSHLREV_B64_gfx10: 3390 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3391 case AMDGPU::V_LSHRREV_B64_e64: 3392 case AMDGPU::V_LSHRREV_B64_gfx10: 3393 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3394 case AMDGPU::V_ASHRREV_I64_e64: 3395 case AMDGPU::V_ASHRREV_I64_gfx10: 3396 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3397 case AMDGPU::V_LSHL_B64_e64: 3398 case AMDGPU::V_LSHR_B64_e64: 3399 case AMDGPU::V_ASHR_I64_e64: 3400 return 1; 3401 default: 3402 return 2; 3403 } 3404 } 3405 3406 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3407 const MCOperand &MO = Inst.getOperand(OpIdx); 3408 if (MO.isImm()) { 3409 return !isInlineConstant(Inst, OpIdx); 3410 } else if (MO.isReg()) { 3411 auto Reg = MO.getReg(); 3412 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3413 auto PReg = mc2PseudoReg(Reg); 3414 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3415 } else { 3416 return true; 3417 } 3418 } 3419 3420 bool 3421 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3422 const OperandVector &Operands) { 3423 const unsigned Opcode = Inst.getOpcode(); 3424 const MCInstrDesc &Desc = MII.get(Opcode); 3425 unsigned LastSGPR = AMDGPU::NoRegister; 3426 unsigned ConstantBusUseCount = 0; 3427 unsigned NumLiterals = 0; 3428 unsigned LiteralSize; 3429 3430 if (Desc.TSFlags & 3431 (SIInstrFlags::VOPC | 3432 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3433 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3434 SIInstrFlags::SDWA)) { 3435 // Check special imm operands (used by madmk, etc) 3436 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3437 ++NumLiterals; 3438 LiteralSize = 4; 3439 } 3440 3441 SmallDenseSet<unsigned> SGPRsUsed; 3442 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3443 if (SGPRUsed != AMDGPU::NoRegister) { 3444 SGPRsUsed.insert(SGPRUsed); 3445 ++ConstantBusUseCount; 3446 } 3447 3448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3449 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3450 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3451 3452 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3453 3454 for (int OpIdx : OpIndices) { 3455 if (OpIdx == -1) break; 3456 3457 const MCOperand &MO = Inst.getOperand(OpIdx); 3458 if (usesConstantBus(Inst, OpIdx)) { 3459 if (MO.isReg()) { 3460 LastSGPR = mc2PseudoReg(MO.getReg()); 3461 // Pairs of registers with a partial intersections like these 3462 // s0, s[0:1] 3463 // flat_scratch_lo, flat_scratch 3464 // flat_scratch_lo, flat_scratch_hi 3465 // are theoretically valid but they are disabled anyway. 3466 // Note that this code mimics SIInstrInfo::verifyInstruction 3467 if (SGPRsUsed.insert(LastSGPR).second) { 3468 ++ConstantBusUseCount; 3469 } 3470 } else { // Expression or a literal 3471 3472 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3473 continue; // special operand like VINTERP attr_chan 3474 3475 // An instruction may use only one literal. 3476 // This has been validated on the previous step. 3477 // See validateVOPLiteral. 3478 // This literal may be used as more than one operand. 3479 // If all these operands are of the same size, 3480 // this literal counts as one scalar value. 3481 // Otherwise it counts as 2 scalar values. 3482 // See "GFX10 Shader Programming", section 3.6.2.3. 3483 3484 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3485 if (Size < 4) Size = 4; 3486 3487 if (NumLiterals == 0) { 3488 NumLiterals = 1; 3489 LiteralSize = Size; 3490 } else if (LiteralSize != Size) { 3491 NumLiterals = 2; 3492 } 3493 } 3494 } 3495 } 3496 } 3497 ConstantBusUseCount += NumLiterals; 3498 3499 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3500 return true; 3501 3502 SMLoc LitLoc = getLitLoc(Operands); 3503 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3504 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3505 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3506 return false; 3507 } 3508 3509 bool 3510 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3511 const OperandVector &Operands) { 3512 const unsigned Opcode = Inst.getOpcode(); 3513 const MCInstrDesc &Desc = MII.get(Opcode); 3514 3515 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3516 if (DstIdx == -1 || 3517 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3518 return true; 3519 } 3520 3521 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3522 3523 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3524 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3525 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3526 3527 assert(DstIdx != -1); 3528 const MCOperand &Dst = Inst.getOperand(DstIdx); 3529 assert(Dst.isReg()); 3530 3531 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3532 3533 for (int SrcIdx : SrcIndices) { 3534 if (SrcIdx == -1) break; 3535 const MCOperand &Src = Inst.getOperand(SrcIdx); 3536 if (Src.isReg()) { 3537 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3538 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3539 Error(getRegLoc(SrcReg, Operands), 3540 "destination must be different than all sources"); 3541 return false; 3542 } 3543 } 3544 } 3545 3546 return true; 3547 } 3548 3549 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3550 3551 const unsigned Opc = Inst.getOpcode(); 3552 const MCInstrDesc &Desc = MII.get(Opc); 3553 3554 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3555 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3556 assert(ClampIdx != -1); 3557 return Inst.getOperand(ClampIdx).getImm() == 0; 3558 } 3559 3560 return true; 3561 } 3562 3563 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3564 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3569 return None; 3570 3571 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3572 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3573 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3574 3575 assert(VDataIdx != -1); 3576 3577 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3578 return None; 3579 3580 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3581 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3582 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3583 if (DMask == 0) 3584 DMask = 1; 3585 3586 bool isPackedD16 = false; 3587 unsigned DataSize = 3588 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3589 if (hasPackedD16()) { 3590 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3591 isPackedD16 = D16Idx >= 0; 3592 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3593 DataSize = (DataSize + 1) / 2; 3594 } 3595 3596 if ((VDataSize / 4) == DataSize + TFESize) 3597 return None; 3598 3599 return StringRef(isPackedD16 3600 ? "image data size does not match dmask, d16 and tfe" 3601 : "image data size does not match dmask and tfe"); 3602 } 3603 3604 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3609 return true; 3610 3611 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3612 3613 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3614 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3615 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3616 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3617 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3618 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3619 3620 assert(VAddr0Idx != -1); 3621 assert(SrsrcIdx != -1); 3622 assert(SrsrcIdx > VAddr0Idx); 3623 3624 if (DimIdx == -1) 3625 return true; // intersect_ray 3626 3627 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3628 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3629 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3630 unsigned ActualAddrSize = 3631 IsNSA ? SrsrcIdx - VAddr0Idx 3632 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3633 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3634 3635 unsigned ExpectedAddrSize = 3636 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3637 3638 if (!IsNSA) { 3639 if (ExpectedAddrSize > 8) 3640 ExpectedAddrSize = 16; 3641 3642 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3643 // This provides backward compatibility for assembly created 3644 // before 160b/192b/224b types were directly supported. 3645 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3646 return true; 3647 } 3648 3649 return ActualAddrSize == ExpectedAddrSize; 3650 } 3651 3652 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3653 3654 const unsigned Opc = Inst.getOpcode(); 3655 const MCInstrDesc &Desc = MII.get(Opc); 3656 3657 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3658 return true; 3659 if (!Desc.mayLoad() || !Desc.mayStore()) 3660 return true; // Not atomic 3661 3662 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3663 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3664 3665 // This is an incomplete check because image_atomic_cmpswap 3666 // may only use 0x3 and 0xf while other atomic operations 3667 // may use 0x1 and 0x3. However these limitations are 3668 // verified when we check that dmask matches dst size. 3669 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3670 } 3671 3672 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3673 3674 const unsigned Opc = Inst.getOpcode(); 3675 const MCInstrDesc &Desc = MII.get(Opc); 3676 3677 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3678 return true; 3679 3680 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3681 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3682 3683 // GATHER4 instructions use dmask in a different fashion compared to 3684 // other MIMG instructions. The only useful DMASK values are 3685 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3686 // (red,red,red,red) etc.) The ISA document doesn't mention 3687 // this. 3688 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3689 } 3690 3691 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3692 const unsigned Opc = Inst.getOpcode(); 3693 const MCInstrDesc &Desc = MII.get(Opc); 3694 3695 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3696 return true; 3697 3698 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3699 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3700 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3701 3702 if (!BaseOpcode->MSAA) 3703 return true; 3704 3705 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3706 assert(DimIdx != -1); 3707 3708 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3709 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3710 3711 return DimInfo->MSAA; 3712 } 3713 3714 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3715 { 3716 switch (Opcode) { 3717 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3718 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3719 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3720 return true; 3721 default: 3722 return false; 3723 } 3724 } 3725 3726 // movrels* opcodes should only allow VGPRS as src0. 3727 // This is specified in .td description for vop1/vop3, 3728 // but sdwa is handled differently. See isSDWAOperand. 3729 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3730 const OperandVector &Operands) { 3731 3732 const unsigned Opc = Inst.getOpcode(); 3733 const MCInstrDesc &Desc = MII.get(Opc); 3734 3735 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3736 return true; 3737 3738 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3739 assert(Src0Idx != -1); 3740 3741 SMLoc ErrLoc; 3742 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3743 if (Src0.isReg()) { 3744 auto Reg = mc2PseudoReg(Src0.getReg()); 3745 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3746 if (!isSGPR(Reg, TRI)) 3747 return true; 3748 ErrLoc = getRegLoc(Reg, Operands); 3749 } else { 3750 ErrLoc = getConstLoc(Operands); 3751 } 3752 3753 Error(ErrLoc, "source operand must be a VGPR"); 3754 return false; 3755 } 3756 3757 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3758 const OperandVector &Operands) { 3759 3760 const unsigned Opc = Inst.getOpcode(); 3761 3762 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3763 return true; 3764 3765 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3766 assert(Src0Idx != -1); 3767 3768 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3769 if (!Src0.isReg()) 3770 return true; 3771 3772 auto Reg = mc2PseudoReg(Src0.getReg()); 3773 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3774 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3775 Error(getRegLoc(Reg, Operands), 3776 "source operand must be either a VGPR or an inline constant"); 3777 return false; 3778 } 3779 3780 return true; 3781 } 3782 3783 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3784 const OperandVector &Operands) { 3785 const unsigned Opc = Inst.getOpcode(); 3786 const MCInstrDesc &Desc = MII.get(Opc); 3787 3788 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3789 return true; 3790 3791 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3792 if (Src2Idx == -1) 3793 return true; 3794 3795 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3796 if (!Src2.isReg()) 3797 return true; 3798 3799 MCRegister Src2Reg = Src2.getReg(); 3800 MCRegister DstReg = Inst.getOperand(0).getReg(); 3801 if (Src2Reg == DstReg) 3802 return true; 3803 3804 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3805 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3806 return true; 3807 3808 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3809 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3810 "source 2 operand must not partially overlap with dst"); 3811 return false; 3812 } 3813 3814 return true; 3815 } 3816 3817 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3818 switch (Inst.getOpcode()) { 3819 default: 3820 return true; 3821 case V_DIV_SCALE_F32_gfx6_gfx7: 3822 case V_DIV_SCALE_F32_vi: 3823 case V_DIV_SCALE_F32_gfx10: 3824 case V_DIV_SCALE_F64_gfx6_gfx7: 3825 case V_DIV_SCALE_F64_vi: 3826 case V_DIV_SCALE_F64_gfx10: 3827 break; 3828 } 3829 3830 // TODO: Check that src0 = src1 or src2. 3831 3832 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3833 AMDGPU::OpName::src2_modifiers, 3834 AMDGPU::OpName::src2_modifiers}) { 3835 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3836 .getImm() & 3837 SISrcMods::ABS) { 3838 return false; 3839 } 3840 } 3841 3842 return true; 3843 } 3844 3845 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3846 3847 const unsigned Opc = Inst.getOpcode(); 3848 const MCInstrDesc &Desc = MII.get(Opc); 3849 3850 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3851 return true; 3852 3853 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3854 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3855 if (isCI() || isSI()) 3856 return false; 3857 } 3858 3859 return true; 3860 } 3861 3862 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3863 const unsigned Opc = Inst.getOpcode(); 3864 const MCInstrDesc &Desc = MII.get(Opc); 3865 3866 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3867 return true; 3868 3869 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3870 if (DimIdx < 0) 3871 return true; 3872 3873 long Imm = Inst.getOperand(DimIdx).getImm(); 3874 if (Imm < 0 || Imm >= 8) 3875 return false; 3876 3877 return true; 3878 } 3879 3880 static bool IsRevOpcode(const unsigned Opcode) 3881 { 3882 switch (Opcode) { 3883 case AMDGPU::V_SUBREV_F32_e32: 3884 case AMDGPU::V_SUBREV_F32_e64: 3885 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3886 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3887 case AMDGPU::V_SUBREV_F32_e32_vi: 3888 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3889 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_F32_e64_vi: 3891 3892 case AMDGPU::V_SUBREV_CO_U32_e32: 3893 case AMDGPU::V_SUBREV_CO_U32_e64: 3894 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3895 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3896 3897 case AMDGPU::V_SUBBREV_U32_e32: 3898 case AMDGPU::V_SUBBREV_U32_e64: 3899 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3900 case AMDGPU::V_SUBBREV_U32_e32_vi: 3901 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3902 case AMDGPU::V_SUBBREV_U32_e64_vi: 3903 3904 case AMDGPU::V_SUBREV_U32_e32: 3905 case AMDGPU::V_SUBREV_U32_e64: 3906 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3907 case AMDGPU::V_SUBREV_U32_e32_vi: 3908 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3909 case AMDGPU::V_SUBREV_U32_e64_vi: 3910 3911 case AMDGPU::V_SUBREV_F16_e32: 3912 case AMDGPU::V_SUBREV_F16_e64: 3913 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3914 case AMDGPU::V_SUBREV_F16_e32_vi: 3915 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3916 case AMDGPU::V_SUBREV_F16_e64_vi: 3917 3918 case AMDGPU::V_SUBREV_U16_e32: 3919 case AMDGPU::V_SUBREV_U16_e64: 3920 case AMDGPU::V_SUBREV_U16_e32_vi: 3921 case AMDGPU::V_SUBREV_U16_e64_vi: 3922 3923 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3924 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3925 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3926 3927 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3928 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3929 3930 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3931 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3932 3933 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3934 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3935 3936 case AMDGPU::V_LSHRREV_B32_e32: 3937 case AMDGPU::V_LSHRREV_B32_e64: 3938 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3939 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3940 case AMDGPU::V_LSHRREV_B32_e32_vi: 3941 case AMDGPU::V_LSHRREV_B32_e64_vi: 3942 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3943 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3944 3945 case AMDGPU::V_ASHRREV_I32_e32: 3946 case AMDGPU::V_ASHRREV_I32_e64: 3947 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3948 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3949 case AMDGPU::V_ASHRREV_I32_e32_vi: 3950 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3951 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3952 case AMDGPU::V_ASHRREV_I32_e64_vi: 3953 3954 case AMDGPU::V_LSHLREV_B32_e32: 3955 case AMDGPU::V_LSHLREV_B32_e64: 3956 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3957 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3958 case AMDGPU::V_LSHLREV_B32_e32_vi: 3959 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3960 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3961 case AMDGPU::V_LSHLREV_B32_e64_vi: 3962 3963 case AMDGPU::V_LSHLREV_B16_e32: 3964 case AMDGPU::V_LSHLREV_B16_e64: 3965 case AMDGPU::V_LSHLREV_B16_e32_vi: 3966 case AMDGPU::V_LSHLREV_B16_e64_vi: 3967 case AMDGPU::V_LSHLREV_B16_gfx10: 3968 3969 case AMDGPU::V_LSHRREV_B16_e32: 3970 case AMDGPU::V_LSHRREV_B16_e64: 3971 case AMDGPU::V_LSHRREV_B16_e32_vi: 3972 case AMDGPU::V_LSHRREV_B16_e64_vi: 3973 case AMDGPU::V_LSHRREV_B16_gfx10: 3974 3975 case AMDGPU::V_ASHRREV_I16_e32: 3976 case AMDGPU::V_ASHRREV_I16_e64: 3977 case AMDGPU::V_ASHRREV_I16_e32_vi: 3978 case AMDGPU::V_ASHRREV_I16_e64_vi: 3979 case AMDGPU::V_ASHRREV_I16_gfx10: 3980 3981 case AMDGPU::V_LSHLREV_B64_e64: 3982 case AMDGPU::V_LSHLREV_B64_gfx10: 3983 case AMDGPU::V_LSHLREV_B64_vi: 3984 3985 case AMDGPU::V_LSHRREV_B64_e64: 3986 case AMDGPU::V_LSHRREV_B64_gfx10: 3987 case AMDGPU::V_LSHRREV_B64_vi: 3988 3989 case AMDGPU::V_ASHRREV_I64_e64: 3990 case AMDGPU::V_ASHRREV_I64_gfx10: 3991 case AMDGPU::V_ASHRREV_I64_vi: 3992 3993 case AMDGPU::V_PK_LSHLREV_B16: 3994 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3995 case AMDGPU::V_PK_LSHLREV_B16_vi: 3996 3997 case AMDGPU::V_PK_LSHRREV_B16: 3998 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3999 case AMDGPU::V_PK_LSHRREV_B16_vi: 4000 case AMDGPU::V_PK_ASHRREV_I16: 4001 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4002 case AMDGPU::V_PK_ASHRREV_I16_vi: 4003 return true; 4004 default: 4005 return false; 4006 } 4007 } 4008 4009 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4010 4011 using namespace SIInstrFlags; 4012 const unsigned Opcode = Inst.getOpcode(); 4013 const MCInstrDesc &Desc = MII.get(Opcode); 4014 4015 // lds_direct register is defined so that it can be used 4016 // with 9-bit operands only. Ignore encodings which do not accept these. 4017 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4018 if ((Desc.TSFlags & Enc) == 0) 4019 return None; 4020 4021 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4022 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4023 if (SrcIdx == -1) 4024 break; 4025 const auto &Src = Inst.getOperand(SrcIdx); 4026 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4027 4028 if (isGFX90A() || isGFX11Plus()) 4029 return StringRef("lds_direct is not supported on this GPU"); 4030 4031 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4032 return StringRef("lds_direct cannot be used with this instruction"); 4033 4034 if (SrcName != OpName::src0) 4035 return StringRef("lds_direct may be used as src0 only"); 4036 } 4037 } 4038 4039 return None; 4040 } 4041 4042 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4043 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4044 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4045 if (Op.isFlatOffset()) 4046 return Op.getStartLoc(); 4047 } 4048 return getLoc(); 4049 } 4050 4051 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4052 const OperandVector &Operands) { 4053 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4054 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4055 return true; 4056 4057 auto Opcode = Inst.getOpcode(); 4058 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4059 assert(OpNum != -1); 4060 4061 const auto &Op = Inst.getOperand(OpNum); 4062 if (!hasFlatOffsets() && Op.getImm() != 0) { 4063 Error(getFlatOffsetLoc(Operands), 4064 "flat offset modifier is not supported on this GPU"); 4065 return false; 4066 } 4067 4068 // For FLAT segment the offset must be positive; 4069 // MSB is ignored and forced to zero. 4070 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4071 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4072 if (!isIntN(OffsetSize, Op.getImm())) { 4073 Error(getFlatOffsetLoc(Operands), 4074 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4075 return false; 4076 } 4077 } else { 4078 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4079 if (!isUIntN(OffsetSize, Op.getImm())) { 4080 Error(getFlatOffsetLoc(Operands), 4081 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4082 return false; 4083 } 4084 } 4085 4086 return true; 4087 } 4088 4089 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4090 // Start with second operand because SMEM Offset cannot be dst or src0. 4091 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4092 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4093 if (Op.isSMEMOffset()) 4094 return Op.getStartLoc(); 4095 } 4096 return getLoc(); 4097 } 4098 4099 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4100 const OperandVector &Operands) { 4101 if (isCI() || isSI()) 4102 return true; 4103 4104 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4105 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4106 return true; 4107 4108 auto Opcode = Inst.getOpcode(); 4109 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4110 if (OpNum == -1) 4111 return true; 4112 4113 const auto &Op = Inst.getOperand(OpNum); 4114 if (!Op.isImm()) 4115 return true; 4116 4117 uint64_t Offset = Op.getImm(); 4118 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4119 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4120 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4121 return true; 4122 4123 Error(getSMEMOffsetLoc(Operands), 4124 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4125 "expected a 21-bit signed offset"); 4126 4127 return false; 4128 } 4129 4130 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4131 unsigned Opcode = Inst.getOpcode(); 4132 const MCInstrDesc &Desc = MII.get(Opcode); 4133 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4134 return true; 4135 4136 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4137 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4138 4139 const int OpIndices[] = { Src0Idx, Src1Idx }; 4140 4141 unsigned NumExprs = 0; 4142 unsigned NumLiterals = 0; 4143 uint32_t LiteralValue; 4144 4145 for (int OpIdx : OpIndices) { 4146 if (OpIdx == -1) break; 4147 4148 const MCOperand &MO = Inst.getOperand(OpIdx); 4149 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4150 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4151 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4152 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4153 if (NumLiterals == 0 || LiteralValue != Value) { 4154 LiteralValue = Value; 4155 ++NumLiterals; 4156 } 4157 } else if (MO.isExpr()) { 4158 ++NumExprs; 4159 } 4160 } 4161 } 4162 4163 return NumLiterals + NumExprs <= 1; 4164 } 4165 4166 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4167 const unsigned Opc = Inst.getOpcode(); 4168 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4169 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4170 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4171 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4172 4173 if (OpSel & ~3) 4174 return false; 4175 } 4176 4177 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4178 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4179 if (OpSelIdx != -1) { 4180 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4181 return false; 4182 } 4183 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4184 if (OpSelHiIdx != -1) { 4185 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4186 return false; 4187 } 4188 } 4189 4190 return true; 4191 } 4192 4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4194 const OperandVector &Operands) { 4195 const unsigned Opc = Inst.getOpcode(); 4196 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4197 if (DppCtrlIdx < 0) 4198 return true; 4199 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4200 4201 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4202 // DPP64 is supported for row_newbcast only. 4203 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4204 if (Src0Idx >= 0 && 4205 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4206 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4207 Error(S, "64 bit dpp only supports row_newbcast"); 4208 return false; 4209 } 4210 } 4211 4212 return true; 4213 } 4214 4215 // Check if VCC register matches wavefront size 4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4217 auto FB = getFeatureBits(); 4218 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4219 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4220 } 4221 4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4224 const OperandVector &Operands) { 4225 unsigned Opcode = Inst.getOpcode(); 4226 const MCInstrDesc &Desc = MII.get(Opcode); 4227 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4228 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4229 ImmIdx == -1) 4230 return true; 4231 4232 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4233 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4234 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4235 4236 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4237 4238 unsigned NumExprs = 0; 4239 unsigned NumLiterals = 0; 4240 uint32_t LiteralValue; 4241 4242 for (int OpIdx : OpIndices) { 4243 if (OpIdx == -1) 4244 continue; 4245 4246 const MCOperand &MO = Inst.getOperand(OpIdx); 4247 if (!MO.isImm() && !MO.isExpr()) 4248 continue; 4249 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4250 continue; 4251 4252 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4253 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4254 Error(getConstLoc(Operands), 4255 "inline constants are not allowed for this operand"); 4256 return false; 4257 } 4258 4259 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4260 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4261 if (NumLiterals == 0 || LiteralValue != Value) { 4262 LiteralValue = Value; 4263 ++NumLiterals; 4264 } 4265 } else if (MO.isExpr()) { 4266 ++NumExprs; 4267 } 4268 } 4269 NumLiterals += NumExprs; 4270 4271 if (!NumLiterals) 4272 return true; 4273 4274 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4275 Error(getLitLoc(Operands), "literal operands are not supported"); 4276 return false; 4277 } 4278 4279 if (NumLiterals > 1) { 4280 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4281 return false; 4282 } 4283 4284 return true; 4285 } 4286 4287 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4288 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4289 const MCRegisterInfo *MRI) { 4290 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4291 if (OpIdx < 0) 4292 return -1; 4293 4294 const MCOperand &Op = Inst.getOperand(OpIdx); 4295 if (!Op.isReg()) 4296 return -1; 4297 4298 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4299 auto Reg = Sub ? Sub : Op.getReg(); 4300 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4301 return AGPR32.contains(Reg) ? 1 : 0; 4302 } 4303 4304 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4305 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4306 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4307 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4308 SIInstrFlags::DS)) == 0) 4309 return true; 4310 4311 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4312 : AMDGPU::OpName::vdata; 4313 4314 const MCRegisterInfo *MRI = getMRI(); 4315 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4316 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4317 4318 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4319 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4320 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4321 return false; 4322 } 4323 4324 auto FB = getFeatureBits(); 4325 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4326 if (DataAreg < 0 || DstAreg < 0) 4327 return true; 4328 return DstAreg == DataAreg; 4329 } 4330 4331 return DstAreg < 1 && DataAreg < 1; 4332 } 4333 4334 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4335 auto FB = getFeatureBits(); 4336 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4337 return true; 4338 4339 const MCRegisterInfo *MRI = getMRI(); 4340 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4341 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4342 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4343 const MCOperand &Op = Inst.getOperand(I); 4344 if (!Op.isReg()) 4345 continue; 4346 4347 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4348 if (!Sub) 4349 continue; 4350 4351 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4352 return false; 4353 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4354 return false; 4355 } 4356 4357 return true; 4358 } 4359 4360 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4363 if (Op.isBLGP()) 4364 return Op.getStartLoc(); 4365 } 4366 return SMLoc(); 4367 } 4368 4369 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4370 const OperandVector &Operands) { 4371 unsigned Opc = Inst.getOpcode(); 4372 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4373 if (BlgpIdx == -1) 4374 return true; 4375 SMLoc BLGPLoc = getBLGPLoc(Operands); 4376 if (!BLGPLoc.isValid()) 4377 return true; 4378 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4379 auto FB = getFeatureBits(); 4380 bool UsesNeg = false; 4381 if (FB[AMDGPU::FeatureGFX940Insts]) { 4382 switch (Opc) { 4383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4387 UsesNeg = true; 4388 } 4389 } 4390 4391 if (IsNeg == UsesNeg) 4392 return true; 4393 4394 Error(BLGPLoc, 4395 UsesNeg ? "invalid modifier: blgp is not supported" 4396 : "invalid modifier: neg is not supported"); 4397 4398 return false; 4399 } 4400 4401 // gfx90a has an undocumented limitation: 4402 // DS_GWS opcodes must use even aligned registers. 4403 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4404 const OperandVector &Operands) { 4405 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4406 return true; 4407 4408 int Opc = Inst.getOpcode(); 4409 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4410 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4411 return true; 4412 4413 const MCRegisterInfo *MRI = getMRI(); 4414 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4415 int Data0Pos = 4416 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4417 assert(Data0Pos != -1); 4418 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4419 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4420 if (RegIdx & 1) { 4421 SMLoc RegLoc = getRegLoc(Reg, Operands); 4422 Error(RegLoc, "vgpr must be even aligned"); 4423 return false; 4424 } 4425 4426 return true; 4427 } 4428 4429 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4430 const OperandVector &Operands, 4431 const SMLoc &IDLoc) { 4432 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4433 AMDGPU::OpName::cpol); 4434 if (CPolPos == -1) 4435 return true; 4436 4437 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4438 4439 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4440 if (TSFlags & SIInstrFlags::SMRD) { 4441 if (CPol && (isSI() || isCI())) { 4442 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4443 Error(S, "cache policy is not supported for SMRD instructions"); 4444 return false; 4445 } 4446 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4447 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4448 return false; 4449 } 4450 } 4451 4452 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4454 StringRef CStr(S.getPointer()); 4455 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4456 Error(S, "scc is not supported on this GPU"); 4457 return false; 4458 } 4459 4460 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4461 return true; 4462 4463 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4464 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4465 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4466 : "instruction must use glc"); 4467 return false; 4468 } 4469 } else { 4470 if (CPol & CPol::GLC) { 4471 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4472 StringRef CStr(S.getPointer()); 4473 S = SMLoc::getFromPointer( 4474 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4475 Error(S, isGFX940() ? "instruction must not use sc0" 4476 : "instruction must not use glc"); 4477 return false; 4478 } 4479 } 4480 4481 return true; 4482 } 4483 4484 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4485 const OperandVector &Operands, 4486 const SMLoc &IDLoc) { 4487 if (isGFX940()) 4488 return true; 4489 4490 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4491 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4492 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4493 return true; 4494 // This is FLAT LDS DMA. 4495 4496 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4497 StringRef CStr(S.getPointer()); 4498 if (!CStr.startswith("lds")) { 4499 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4500 // And LDS version should have 'lds' modifier, but it follows optional 4501 // operands so its absense is ignored by the matcher. 4502 Error(IDLoc, "invalid operands for instruction"); 4503 return false; 4504 } 4505 4506 return true; 4507 } 4508 4509 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4510 if (!isGFX11Plus()) 4511 return true; 4512 for (auto &Operand : Operands) { 4513 if (!Operand->isReg()) 4514 continue; 4515 unsigned Reg = Operand->getReg(); 4516 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4517 Error(getRegLoc(Reg, Operands), 4518 "execz and vccz are not supported on this GPU"); 4519 return false; 4520 } 4521 } 4522 return true; 4523 } 4524 4525 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4526 const SMLoc &IDLoc, 4527 const OperandVector &Operands) { 4528 if (auto ErrMsg = validateLdsDirect(Inst)) { 4529 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4530 return false; 4531 } 4532 if (!validateSOPLiteral(Inst)) { 4533 Error(getLitLoc(Operands), 4534 "only one literal operand is allowed"); 4535 return false; 4536 } 4537 if (!validateVOPLiteral(Inst, Operands)) { 4538 return false; 4539 } 4540 if (!validateConstantBusLimitations(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateIntClampSupported(Inst)) { 4547 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4548 "integer clamping is not supported on this GPU"); 4549 return false; 4550 } 4551 if (!validateOpSel(Inst)) { 4552 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4553 "invalid op_sel operand"); 4554 return false; 4555 } 4556 if (!validateDPP(Inst, Operands)) { 4557 return false; 4558 } 4559 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4560 if (!validateMIMGD16(Inst)) { 4561 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4562 "d16 modifier is not supported on this GPU"); 4563 return false; 4564 } 4565 if (!validateMIMGDim(Inst)) { 4566 Error(IDLoc, "dim modifier is required on this GPU"); 4567 return false; 4568 } 4569 if (!validateMIMGMSAA(Inst)) { 4570 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4571 "invalid dim; must be MSAA type"); 4572 return false; 4573 } 4574 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4575 Error(IDLoc, *ErrMsg); 4576 return false; 4577 } 4578 if (!validateMIMGAddrSize(Inst)) { 4579 Error(IDLoc, 4580 "image address size does not match dim and a16"); 4581 return false; 4582 } 4583 if (!validateMIMGAtomicDMask(Inst)) { 4584 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4585 "invalid atomic image dmask"); 4586 return false; 4587 } 4588 if (!validateMIMGGatherDMask(Inst)) { 4589 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4590 "invalid image_gather dmask: only one bit must be set"); 4591 return false; 4592 } 4593 if (!validateMovrels(Inst, Operands)) { 4594 return false; 4595 } 4596 if (!validateFlatOffset(Inst, Operands)) { 4597 return false; 4598 } 4599 if (!validateSMEMOffset(Inst, Operands)) { 4600 return false; 4601 } 4602 if (!validateMAIAccWrite(Inst, Operands)) { 4603 return false; 4604 } 4605 if (!validateMFMA(Inst, Operands)) { 4606 return false; 4607 } 4608 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4609 return false; 4610 } 4611 4612 if (!validateAGPRLdSt(Inst)) { 4613 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4614 ? "invalid register class: data and dst should be all VGPR or AGPR" 4615 : "invalid register class: agpr loads and stores not supported on this GPU" 4616 ); 4617 return false; 4618 } 4619 if (!validateVGPRAlign(Inst)) { 4620 Error(IDLoc, 4621 "invalid register class: vgpr tuples must be 64 bit aligned"); 4622 return false; 4623 } 4624 if (!validateGWS(Inst, Operands)) { 4625 return false; 4626 } 4627 4628 if (!validateBLGP(Inst, Operands)) { 4629 return false; 4630 } 4631 4632 if (!validateDivScale(Inst)) { 4633 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4634 return false; 4635 } 4636 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4637 return false; 4638 } 4639 if (!validateExeczVcczOperands(Operands)) { 4640 return false; 4641 } 4642 4643 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4644 return false; 4645 } 4646 4647 return true; 4648 } 4649 4650 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4651 const FeatureBitset &FBS, 4652 unsigned VariantID = 0); 4653 4654 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4655 const FeatureBitset &AvailableFeatures, 4656 unsigned VariantID); 4657 4658 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4659 const FeatureBitset &FBS) { 4660 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4661 } 4662 4663 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4664 const FeatureBitset &FBS, 4665 ArrayRef<unsigned> Variants) { 4666 for (auto Variant : Variants) { 4667 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4668 return true; 4669 } 4670 4671 return false; 4672 } 4673 4674 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4675 const SMLoc &IDLoc) { 4676 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4677 4678 // Check if requested instruction variant is supported. 4679 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4680 return false; 4681 4682 // This instruction is not supported. 4683 // Clear any other pending errors because they are no longer relevant. 4684 getParser().clearPendingErrors(); 4685 4686 // Requested instruction variant is not supported. 4687 // Check if any other variants are supported. 4688 StringRef VariantName = getMatchedVariantName(); 4689 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4690 return Error(IDLoc, 4691 Twine(VariantName, 4692 " variant of this instruction is not supported")); 4693 } 4694 4695 // Finally check if this instruction is supported on any other GPU. 4696 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4697 return Error(IDLoc, "instruction not supported on this GPU"); 4698 } 4699 4700 // Instruction not supported on any GPU. Probably a typo. 4701 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4702 return Error(IDLoc, "invalid instruction" + Suggestion); 4703 } 4704 4705 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4706 OperandVector &Operands, 4707 MCStreamer &Out, 4708 uint64_t &ErrorInfo, 4709 bool MatchingInlineAsm) { 4710 MCInst Inst; 4711 unsigned Result = Match_Success; 4712 for (auto Variant : getMatchedVariants()) { 4713 uint64_t EI; 4714 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4715 Variant); 4716 // We order match statuses from least to most specific. We use most specific 4717 // status as resulting 4718 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4719 if ((R == Match_Success) || 4720 (R == Match_PreferE32) || 4721 (R == Match_MissingFeature && Result != Match_PreferE32) || 4722 (R == Match_InvalidOperand && Result != Match_MissingFeature 4723 && Result != Match_PreferE32) || 4724 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4725 && Result != Match_MissingFeature 4726 && Result != Match_PreferE32)) { 4727 Result = R; 4728 ErrorInfo = EI; 4729 } 4730 if (R == Match_Success) 4731 break; 4732 } 4733 4734 if (Result == Match_Success) { 4735 if (!validateInstruction(Inst, IDLoc, Operands)) { 4736 return true; 4737 } 4738 Inst.setLoc(IDLoc); 4739 Out.emitInstruction(Inst, getSTI()); 4740 return false; 4741 } 4742 4743 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4744 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4745 return true; 4746 } 4747 4748 switch (Result) { 4749 default: break; 4750 case Match_MissingFeature: 4751 // It has been verified that the specified instruction 4752 // mnemonic is valid. A match was found but it requires 4753 // features which are not supported on this GPU. 4754 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4755 4756 case Match_InvalidOperand: { 4757 SMLoc ErrorLoc = IDLoc; 4758 if (ErrorInfo != ~0ULL) { 4759 if (ErrorInfo >= Operands.size()) { 4760 return Error(IDLoc, "too few operands for instruction"); 4761 } 4762 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4763 if (ErrorLoc == SMLoc()) 4764 ErrorLoc = IDLoc; 4765 } 4766 return Error(ErrorLoc, "invalid operand for instruction"); 4767 } 4768 4769 case Match_PreferE32: 4770 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4771 "should be encoded as e32"); 4772 case Match_MnemonicFail: 4773 llvm_unreachable("Invalid instructions should have been handled already"); 4774 } 4775 llvm_unreachable("Implement any new match types added!"); 4776 } 4777 4778 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4779 int64_t Tmp = -1; 4780 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4781 return true; 4782 } 4783 if (getParser().parseAbsoluteExpression(Tmp)) { 4784 return true; 4785 } 4786 Ret = static_cast<uint32_t>(Tmp); 4787 return false; 4788 } 4789 4790 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4791 uint32_t &Minor) { 4792 if (ParseAsAbsoluteExpression(Major)) 4793 return TokError("invalid major version"); 4794 4795 if (!trySkipToken(AsmToken::Comma)) 4796 return TokError("minor version number required, comma expected"); 4797 4798 if (ParseAsAbsoluteExpression(Minor)) 4799 return TokError("invalid minor version"); 4800 4801 return false; 4802 } 4803 4804 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4805 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4806 return TokError("directive only supported for amdgcn architecture"); 4807 4808 std::string TargetIDDirective; 4809 SMLoc TargetStart = getTok().getLoc(); 4810 if (getParser().parseEscapedString(TargetIDDirective)) 4811 return true; 4812 4813 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4814 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4815 return getParser().Error(TargetRange.Start, 4816 (Twine(".amdgcn_target directive's target id ") + 4817 Twine(TargetIDDirective) + 4818 Twine(" does not match the specified target id ") + 4819 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4820 4821 return false; 4822 } 4823 4824 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4825 return Error(Range.Start, "value out of range", Range); 4826 } 4827 4828 bool AMDGPUAsmParser::calculateGPRBlocks( 4829 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4830 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4831 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4832 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4833 // TODO(scott.linder): These calculations are duplicated from 4834 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4835 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4836 4837 unsigned NumVGPRs = NextFreeVGPR; 4838 unsigned NumSGPRs = NextFreeSGPR; 4839 4840 if (Version.Major >= 10) 4841 NumSGPRs = 0; 4842 else { 4843 unsigned MaxAddressableNumSGPRs = 4844 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4845 4846 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4847 NumSGPRs > MaxAddressableNumSGPRs) 4848 return OutOfRangeError(SGPRRange); 4849 4850 NumSGPRs += 4851 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4852 4853 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4854 NumSGPRs > MaxAddressableNumSGPRs) 4855 return OutOfRangeError(SGPRRange); 4856 4857 if (Features.test(FeatureSGPRInitBug)) 4858 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4859 } 4860 4861 VGPRBlocks = 4862 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4863 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4864 4865 return false; 4866 } 4867 4868 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4869 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4870 return TokError("directive only supported for amdgcn architecture"); 4871 4872 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4873 return TokError("directive only supported for amdhsa OS"); 4874 4875 StringRef KernelName; 4876 if (getParser().parseIdentifier(KernelName)) 4877 return true; 4878 4879 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4880 4881 StringSet<> Seen; 4882 4883 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4884 4885 SMRange VGPRRange; 4886 uint64_t NextFreeVGPR = 0; 4887 uint64_t AccumOffset = 0; 4888 uint64_t SharedVGPRCount = 0; 4889 SMRange SGPRRange; 4890 uint64_t NextFreeSGPR = 0; 4891 4892 // Count the number of user SGPRs implied from the enabled feature bits. 4893 unsigned ImpliedUserSGPRCount = 0; 4894 4895 // Track if the asm explicitly contains the directive for the user SGPR 4896 // count. 4897 Optional<unsigned> ExplicitUserSGPRCount; 4898 bool ReserveVCC = true; 4899 bool ReserveFlatScr = true; 4900 Optional<bool> EnableWavefrontSize32; 4901 4902 while (true) { 4903 while (trySkipToken(AsmToken::EndOfStatement)); 4904 4905 StringRef ID; 4906 SMRange IDRange = getTok().getLocRange(); 4907 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4908 return true; 4909 4910 if (ID == ".end_amdhsa_kernel") 4911 break; 4912 4913 if (Seen.find(ID) != Seen.end()) 4914 return TokError(".amdhsa_ directives cannot be repeated"); 4915 Seen.insert(ID); 4916 4917 SMLoc ValStart = getLoc(); 4918 int64_t IVal; 4919 if (getParser().parseAbsoluteExpression(IVal)) 4920 return true; 4921 SMLoc ValEnd = getLoc(); 4922 SMRange ValRange = SMRange(ValStart, ValEnd); 4923 4924 if (IVal < 0) 4925 return OutOfRangeError(ValRange); 4926 4927 uint64_t Val = IVal; 4928 4929 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4930 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4931 return OutOfRangeError(RANGE); \ 4932 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4933 4934 if (ID == ".amdhsa_group_segment_fixed_size") { 4935 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4936 return OutOfRangeError(ValRange); 4937 KD.group_segment_fixed_size = Val; 4938 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4939 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4940 return OutOfRangeError(ValRange); 4941 KD.private_segment_fixed_size = Val; 4942 } else if (ID == ".amdhsa_kernarg_size") { 4943 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4944 return OutOfRangeError(ValRange); 4945 KD.kernarg_size = Val; 4946 } else if (ID == ".amdhsa_user_sgpr_count") { 4947 ExplicitUserSGPRCount = Val; 4948 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4949 if (hasArchitectedFlatScratch()) 4950 return Error(IDRange.Start, 4951 "directive is not supported with architected flat scratch", 4952 IDRange); 4953 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4954 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4955 Val, ValRange); 4956 if (Val) 4957 ImpliedUserSGPRCount += 4; 4958 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4959 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4960 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4961 ValRange); 4962 if (Val) 4963 ImpliedUserSGPRCount += 2; 4964 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4965 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4966 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4967 ValRange); 4968 if (Val) 4969 ImpliedUserSGPRCount += 2; 4970 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4971 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4972 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4973 Val, ValRange); 4974 if (Val) 4975 ImpliedUserSGPRCount += 2; 4976 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4977 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4978 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4979 ValRange); 4980 if (Val) 4981 ImpliedUserSGPRCount += 2; 4982 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4983 if (hasArchitectedFlatScratch()) 4984 return Error(IDRange.Start, 4985 "directive is not supported with architected flat scratch", 4986 IDRange); 4987 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4988 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4989 ValRange); 4990 if (Val) 4991 ImpliedUserSGPRCount += 2; 4992 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4993 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4994 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4995 Val, ValRange); 4996 if (Val) 4997 ImpliedUserSGPRCount += 1; 4998 } else if (ID == ".amdhsa_wavefront_size32") { 4999 if (IVersion.Major < 10) 5000 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5001 EnableWavefrontSize32 = Val; 5002 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5003 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5004 Val, ValRange); 5005 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5006 if (hasArchitectedFlatScratch()) 5007 return Error(IDRange.Start, 5008 "directive is not supported with architected flat scratch", 5009 IDRange); 5010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5011 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5012 } else if (ID == ".amdhsa_enable_private_segment") { 5013 if (!hasArchitectedFlatScratch()) 5014 return Error( 5015 IDRange.Start, 5016 "directive is not supported without architected flat scratch", 5017 IDRange); 5018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5019 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5020 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5021 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5022 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5023 ValRange); 5024 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5025 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5026 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5027 ValRange); 5028 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5029 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5030 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5031 ValRange); 5032 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5033 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5034 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5035 ValRange); 5036 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5037 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5038 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5039 ValRange); 5040 } else if (ID == ".amdhsa_next_free_vgpr") { 5041 VGPRRange = ValRange; 5042 NextFreeVGPR = Val; 5043 } else if (ID == ".amdhsa_next_free_sgpr") { 5044 SGPRRange = ValRange; 5045 NextFreeSGPR = Val; 5046 } else if (ID == ".amdhsa_accum_offset") { 5047 if (!isGFX90A()) 5048 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5049 AccumOffset = Val; 5050 } else if (ID == ".amdhsa_reserve_vcc") { 5051 if (!isUInt<1>(Val)) 5052 return OutOfRangeError(ValRange); 5053 ReserveVCC = Val; 5054 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5055 if (IVersion.Major < 7) 5056 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5057 if (hasArchitectedFlatScratch()) 5058 return Error(IDRange.Start, 5059 "directive is not supported with architected flat scratch", 5060 IDRange); 5061 if (!isUInt<1>(Val)) 5062 return OutOfRangeError(ValRange); 5063 ReserveFlatScr = Val; 5064 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5065 if (IVersion.Major < 8) 5066 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5067 if (!isUInt<1>(Val)) 5068 return OutOfRangeError(ValRange); 5069 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5070 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5071 IDRange); 5072 } else if (ID == ".amdhsa_float_round_mode_32") { 5073 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5074 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5075 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5076 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5077 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5078 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5079 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5080 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5081 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5082 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5083 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5084 ValRange); 5085 } else if (ID == ".amdhsa_dx10_clamp") { 5086 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5087 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5088 } else if (ID == ".amdhsa_ieee_mode") { 5089 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5090 Val, ValRange); 5091 } else if (ID == ".amdhsa_fp16_overflow") { 5092 if (IVersion.Major < 9) 5093 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5094 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5095 ValRange); 5096 } else if (ID == ".amdhsa_tg_split") { 5097 if (!isGFX90A()) 5098 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5100 ValRange); 5101 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5102 if (IVersion.Major < 10) 5103 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5104 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5105 ValRange); 5106 } else if (ID == ".amdhsa_memory_ordered") { 5107 if (IVersion.Major < 10) 5108 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5109 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5110 ValRange); 5111 } else if (ID == ".amdhsa_forward_progress") { 5112 if (IVersion.Major < 10) 5113 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5115 ValRange); 5116 } else if (ID == ".amdhsa_shared_vgpr_count") { 5117 if (IVersion.Major < 10) 5118 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5119 SharedVGPRCount = Val; 5120 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5121 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5122 ValRange); 5123 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5124 PARSE_BITS_ENTRY( 5125 KD.compute_pgm_rsrc2, 5126 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5127 ValRange); 5128 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5129 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5130 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5131 Val, ValRange); 5132 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5133 PARSE_BITS_ENTRY( 5134 KD.compute_pgm_rsrc2, 5135 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5136 ValRange); 5137 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5138 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5139 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5140 Val, ValRange); 5141 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5142 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5143 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5144 Val, ValRange); 5145 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5146 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5147 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5148 Val, ValRange); 5149 } else if (ID == ".amdhsa_exception_int_div_zero") { 5150 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5151 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5152 Val, ValRange); 5153 } else { 5154 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5155 } 5156 5157 #undef PARSE_BITS_ENTRY 5158 } 5159 5160 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5161 return TokError(".amdhsa_next_free_vgpr directive is required"); 5162 5163 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5164 return TokError(".amdhsa_next_free_sgpr directive is required"); 5165 5166 unsigned VGPRBlocks; 5167 unsigned SGPRBlocks; 5168 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5169 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5170 EnableWavefrontSize32, NextFreeVGPR, 5171 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5172 SGPRBlocks)) 5173 return true; 5174 5175 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5176 VGPRBlocks)) 5177 return OutOfRangeError(VGPRRange); 5178 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5179 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5180 5181 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5182 SGPRBlocks)) 5183 return OutOfRangeError(SGPRRange); 5184 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5185 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5186 SGPRBlocks); 5187 5188 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5189 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5190 "enabled user SGPRs"); 5191 5192 unsigned UserSGPRCount = 5193 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5194 5195 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5196 return TokError("too many user SGPRs enabled"); 5197 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5198 UserSGPRCount); 5199 5200 if (isGFX90A()) { 5201 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5202 return TokError(".amdhsa_accum_offset directive is required"); 5203 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5204 return TokError("accum_offset should be in range [4..256] in " 5205 "increments of 4"); 5206 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5207 return TokError("accum_offset exceeds total VGPR allocation"); 5208 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5209 (AccumOffset / 4 - 1)); 5210 } 5211 5212 if (IVersion.Major == 10) { 5213 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5214 if (SharedVGPRCount && EnableWavefrontSize32) { 5215 return TokError("shared_vgpr_count directive not valid on " 5216 "wavefront size 32"); 5217 } 5218 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5219 return TokError("shared_vgpr_count*2 + " 5220 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5221 "exceed 63\n"); 5222 } 5223 } 5224 5225 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5226 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5227 ReserveFlatScr); 5228 return false; 5229 } 5230 5231 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5232 uint32_t Major; 5233 uint32_t Minor; 5234 5235 if (ParseDirectiveMajorMinor(Major, Minor)) 5236 return true; 5237 5238 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5239 return false; 5240 } 5241 5242 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5243 uint32_t Major; 5244 uint32_t Minor; 5245 uint32_t Stepping; 5246 StringRef VendorName; 5247 StringRef ArchName; 5248 5249 // If this directive has no arguments, then use the ISA version for the 5250 // targeted GPU. 5251 if (isToken(AsmToken::EndOfStatement)) { 5252 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5253 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5254 ISA.Stepping, 5255 "AMD", "AMDGPU"); 5256 return false; 5257 } 5258 5259 if (ParseDirectiveMajorMinor(Major, Minor)) 5260 return true; 5261 5262 if (!trySkipToken(AsmToken::Comma)) 5263 return TokError("stepping version number required, comma expected"); 5264 5265 if (ParseAsAbsoluteExpression(Stepping)) 5266 return TokError("invalid stepping version"); 5267 5268 if (!trySkipToken(AsmToken::Comma)) 5269 return TokError("vendor name required, comma expected"); 5270 5271 if (!parseString(VendorName, "invalid vendor name")) 5272 return true; 5273 5274 if (!trySkipToken(AsmToken::Comma)) 5275 return TokError("arch name required, comma expected"); 5276 5277 if (!parseString(ArchName, "invalid arch name")) 5278 return true; 5279 5280 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5281 VendorName, ArchName); 5282 return false; 5283 } 5284 5285 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5286 amd_kernel_code_t &Header) { 5287 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5288 // assembly for backwards compatibility. 5289 if (ID == "max_scratch_backing_memory_byte_size") { 5290 Parser.eatToEndOfStatement(); 5291 return false; 5292 } 5293 5294 SmallString<40> ErrStr; 5295 raw_svector_ostream Err(ErrStr); 5296 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5297 return TokError(Err.str()); 5298 } 5299 Lex(); 5300 5301 if (ID == "enable_wavefront_size32") { 5302 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5303 if (!isGFX10Plus()) 5304 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5305 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5306 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5307 } else { 5308 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5309 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5310 } 5311 } 5312 5313 if (ID == "wavefront_size") { 5314 if (Header.wavefront_size == 5) { 5315 if (!isGFX10Plus()) 5316 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5317 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5318 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5319 } else if (Header.wavefront_size == 6) { 5320 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5321 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5322 } 5323 } 5324 5325 if (ID == "enable_wgp_mode") { 5326 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5327 !isGFX10Plus()) 5328 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5329 } 5330 5331 if (ID == "enable_mem_ordered") { 5332 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5333 !isGFX10Plus()) 5334 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5335 } 5336 5337 if (ID == "enable_fwd_progress") { 5338 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5339 !isGFX10Plus()) 5340 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5341 } 5342 5343 return false; 5344 } 5345 5346 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5347 amd_kernel_code_t Header; 5348 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5349 5350 while (true) { 5351 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5352 // will set the current token to EndOfStatement. 5353 while(trySkipToken(AsmToken::EndOfStatement)); 5354 5355 StringRef ID; 5356 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5357 return true; 5358 5359 if (ID == ".end_amd_kernel_code_t") 5360 break; 5361 5362 if (ParseAMDKernelCodeTValue(ID, Header)) 5363 return true; 5364 } 5365 5366 getTargetStreamer().EmitAMDKernelCodeT(Header); 5367 5368 return false; 5369 } 5370 5371 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5372 StringRef KernelName; 5373 if (!parseId(KernelName, "expected symbol name")) 5374 return true; 5375 5376 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5377 ELF::STT_AMDGPU_HSA_KERNEL); 5378 5379 KernelScope.initialize(getContext()); 5380 return false; 5381 } 5382 5383 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5384 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5385 return Error(getLoc(), 5386 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5387 "architectures"); 5388 } 5389 5390 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5391 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5392 return Error(getParser().getTok().getLoc(), "target id must match options"); 5393 5394 getTargetStreamer().EmitISAVersion(); 5395 Lex(); 5396 5397 return false; 5398 } 5399 5400 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5401 const char *AssemblerDirectiveBegin; 5402 const char *AssemblerDirectiveEnd; 5403 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5404 isHsaAbiVersion3AndAbove(&getSTI()) 5405 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5406 HSAMD::V3::AssemblerDirectiveEnd) 5407 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5408 HSAMD::AssemblerDirectiveEnd); 5409 5410 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5411 return Error(getLoc(), 5412 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5413 "not available on non-amdhsa OSes")).str()); 5414 } 5415 5416 std::string HSAMetadataString; 5417 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5418 HSAMetadataString)) 5419 return true; 5420 5421 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5422 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5423 return Error(getLoc(), "invalid HSA metadata"); 5424 } else { 5425 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5426 return Error(getLoc(), "invalid HSA metadata"); 5427 } 5428 5429 return false; 5430 } 5431 5432 /// Common code to parse out a block of text (typically YAML) between start and 5433 /// end directives. 5434 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5435 const char *AssemblerDirectiveEnd, 5436 std::string &CollectString) { 5437 5438 raw_string_ostream CollectStream(CollectString); 5439 5440 getLexer().setSkipSpace(false); 5441 5442 bool FoundEnd = false; 5443 while (!isToken(AsmToken::Eof)) { 5444 while (isToken(AsmToken::Space)) { 5445 CollectStream << getTokenStr(); 5446 Lex(); 5447 } 5448 5449 if (trySkipId(AssemblerDirectiveEnd)) { 5450 FoundEnd = true; 5451 break; 5452 } 5453 5454 CollectStream << Parser.parseStringToEndOfStatement() 5455 << getContext().getAsmInfo()->getSeparatorString(); 5456 5457 Parser.eatToEndOfStatement(); 5458 } 5459 5460 getLexer().setSkipSpace(true); 5461 5462 if (isToken(AsmToken::Eof) && !FoundEnd) { 5463 return TokError(Twine("expected directive ") + 5464 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5465 } 5466 5467 CollectStream.flush(); 5468 return false; 5469 } 5470 5471 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5472 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5473 std::string String; 5474 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5475 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5476 return true; 5477 5478 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5479 if (!PALMetadata->setFromString(String)) 5480 return Error(getLoc(), "invalid PAL metadata"); 5481 return false; 5482 } 5483 5484 /// Parse the assembler directive for old linear-format PAL metadata. 5485 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5486 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5487 return Error(getLoc(), 5488 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5489 "not available on non-amdpal OSes")).str()); 5490 } 5491 5492 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5493 PALMetadata->setLegacy(); 5494 for (;;) { 5495 uint32_t Key, Value; 5496 if (ParseAsAbsoluteExpression(Key)) { 5497 return TokError(Twine("invalid value in ") + 5498 Twine(PALMD::AssemblerDirective)); 5499 } 5500 if (!trySkipToken(AsmToken::Comma)) { 5501 return TokError(Twine("expected an even number of values in ") + 5502 Twine(PALMD::AssemblerDirective)); 5503 } 5504 if (ParseAsAbsoluteExpression(Value)) { 5505 return TokError(Twine("invalid value in ") + 5506 Twine(PALMD::AssemblerDirective)); 5507 } 5508 PALMetadata->setRegister(Key, Value); 5509 if (!trySkipToken(AsmToken::Comma)) 5510 break; 5511 } 5512 return false; 5513 } 5514 5515 /// ParseDirectiveAMDGPULDS 5516 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5517 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5518 if (getParser().checkForValidSection()) 5519 return true; 5520 5521 StringRef Name; 5522 SMLoc NameLoc = getLoc(); 5523 if (getParser().parseIdentifier(Name)) 5524 return TokError("expected identifier in directive"); 5525 5526 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5527 if (parseToken(AsmToken::Comma, "expected ','")) 5528 return true; 5529 5530 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5531 5532 int64_t Size; 5533 SMLoc SizeLoc = getLoc(); 5534 if (getParser().parseAbsoluteExpression(Size)) 5535 return true; 5536 if (Size < 0) 5537 return Error(SizeLoc, "size must be non-negative"); 5538 if (Size > LocalMemorySize) 5539 return Error(SizeLoc, "size is too large"); 5540 5541 int64_t Alignment = 4; 5542 if (trySkipToken(AsmToken::Comma)) { 5543 SMLoc AlignLoc = getLoc(); 5544 if (getParser().parseAbsoluteExpression(Alignment)) 5545 return true; 5546 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5547 return Error(AlignLoc, "alignment must be a power of two"); 5548 5549 // Alignment larger than the size of LDS is possible in theory, as long 5550 // as the linker manages to place to symbol at address 0, but we do want 5551 // to make sure the alignment fits nicely into a 32-bit integer. 5552 if (Alignment >= 1u << 31) 5553 return Error(AlignLoc, "alignment is too large"); 5554 } 5555 5556 if (parseEOL()) 5557 return true; 5558 5559 Symbol->redefineIfPossible(); 5560 if (!Symbol->isUndefined()) 5561 return Error(NameLoc, "invalid symbol redefinition"); 5562 5563 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5564 return false; 5565 } 5566 5567 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5568 StringRef IDVal = DirectiveID.getString(); 5569 5570 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5571 if (IDVal == ".amdhsa_kernel") 5572 return ParseDirectiveAMDHSAKernel(); 5573 5574 // TODO: Restructure/combine with PAL metadata directive. 5575 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5576 return ParseDirectiveHSAMetadata(); 5577 } else { 5578 if (IDVal == ".hsa_code_object_version") 5579 return ParseDirectiveHSACodeObjectVersion(); 5580 5581 if (IDVal == ".hsa_code_object_isa") 5582 return ParseDirectiveHSACodeObjectISA(); 5583 5584 if (IDVal == ".amd_kernel_code_t") 5585 return ParseDirectiveAMDKernelCodeT(); 5586 5587 if (IDVal == ".amdgpu_hsa_kernel") 5588 return ParseDirectiveAMDGPUHsaKernel(); 5589 5590 if (IDVal == ".amd_amdgpu_isa") 5591 return ParseDirectiveISAVersion(); 5592 5593 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5594 return ParseDirectiveHSAMetadata(); 5595 } 5596 5597 if (IDVal == ".amdgcn_target") 5598 return ParseDirectiveAMDGCNTarget(); 5599 5600 if (IDVal == ".amdgpu_lds") 5601 return ParseDirectiveAMDGPULDS(); 5602 5603 if (IDVal == PALMD::AssemblerDirectiveBegin) 5604 return ParseDirectivePALMetadataBegin(); 5605 5606 if (IDVal == PALMD::AssemblerDirective) 5607 return ParseDirectivePALMetadata(); 5608 5609 return true; 5610 } 5611 5612 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5613 unsigned RegNo) { 5614 5615 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5616 return isGFX9Plus(); 5617 5618 // GFX10+ has 2 more SGPRs 104 and 105. 5619 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5620 return hasSGPR104_SGPR105(); 5621 5622 switch (RegNo) { 5623 case AMDGPU::SRC_SHARED_BASE: 5624 case AMDGPU::SRC_SHARED_LIMIT: 5625 case AMDGPU::SRC_PRIVATE_BASE: 5626 case AMDGPU::SRC_PRIVATE_LIMIT: 5627 return isGFX9Plus(); 5628 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5629 return isGFX9Plus() && !isGFX11Plus(); 5630 case AMDGPU::TBA: 5631 case AMDGPU::TBA_LO: 5632 case AMDGPU::TBA_HI: 5633 case AMDGPU::TMA: 5634 case AMDGPU::TMA_LO: 5635 case AMDGPU::TMA_HI: 5636 return !isGFX9Plus(); 5637 case AMDGPU::XNACK_MASK: 5638 case AMDGPU::XNACK_MASK_LO: 5639 case AMDGPU::XNACK_MASK_HI: 5640 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5641 case AMDGPU::SGPR_NULL: 5642 return isGFX10Plus(); 5643 default: 5644 break; 5645 } 5646 5647 if (isCI()) 5648 return true; 5649 5650 if (isSI() || isGFX10Plus()) { 5651 // No flat_scr on SI. 5652 // On GFX10Plus flat scratch is not a valid register operand and can only be 5653 // accessed with s_setreg/s_getreg. 5654 switch (RegNo) { 5655 case AMDGPU::FLAT_SCR: 5656 case AMDGPU::FLAT_SCR_LO: 5657 case AMDGPU::FLAT_SCR_HI: 5658 return false; 5659 default: 5660 return true; 5661 } 5662 } 5663 5664 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5665 // SI/CI have. 5666 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5667 return hasSGPR102_SGPR103(); 5668 5669 return true; 5670 } 5671 5672 OperandMatchResultTy 5673 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5674 OperandMode Mode) { 5675 // Try to parse with a custom parser 5676 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5677 5678 // If we successfully parsed the operand or if there as an error parsing, 5679 // we are done. 5680 // 5681 // If we are parsing after we reach EndOfStatement then this means we 5682 // are appending default values to the Operands list. This is only done 5683 // by custom parser, so we shouldn't continue on to the generic parsing. 5684 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5685 isToken(AsmToken::EndOfStatement)) 5686 return ResTy; 5687 5688 SMLoc RBraceLoc; 5689 SMLoc LBraceLoc = getLoc(); 5690 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5691 unsigned Prefix = Operands.size(); 5692 5693 for (;;) { 5694 auto Loc = getLoc(); 5695 ResTy = parseReg(Operands); 5696 if (ResTy == MatchOperand_NoMatch) 5697 Error(Loc, "expected a register"); 5698 if (ResTy != MatchOperand_Success) 5699 return MatchOperand_ParseFail; 5700 5701 RBraceLoc = getLoc(); 5702 if (trySkipToken(AsmToken::RBrac)) 5703 break; 5704 5705 if (!skipToken(AsmToken::Comma, 5706 "expected a comma or a closing square bracket")) { 5707 return MatchOperand_ParseFail; 5708 } 5709 } 5710 5711 if (Operands.size() - Prefix > 1) { 5712 Operands.insert(Operands.begin() + Prefix, 5713 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5714 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5715 } 5716 5717 return MatchOperand_Success; 5718 } 5719 5720 return parseRegOrImm(Operands); 5721 } 5722 5723 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5724 // Clear any forced encodings from the previous instruction. 5725 setForcedEncodingSize(0); 5726 setForcedDPP(false); 5727 setForcedSDWA(false); 5728 5729 if (Name.endswith("_e64_dpp")) { 5730 setForcedDPP(true); 5731 setForcedEncodingSize(64); 5732 return Name.substr(0, Name.size() - 8); 5733 } else if (Name.endswith("_e64")) { 5734 setForcedEncodingSize(64); 5735 return Name.substr(0, Name.size() - 4); 5736 } else if (Name.endswith("_e32")) { 5737 setForcedEncodingSize(32); 5738 return Name.substr(0, Name.size() - 4); 5739 } else if (Name.endswith("_dpp")) { 5740 setForcedDPP(true); 5741 return Name.substr(0, Name.size() - 4); 5742 } else if (Name.endswith("_sdwa")) { 5743 setForcedSDWA(true); 5744 return Name.substr(0, Name.size() - 5); 5745 } 5746 return Name; 5747 } 5748 5749 static void applyMnemonicAliases(StringRef &Mnemonic, 5750 const FeatureBitset &Features, 5751 unsigned VariantID); 5752 5753 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5754 StringRef Name, 5755 SMLoc NameLoc, OperandVector &Operands) { 5756 // Add the instruction mnemonic 5757 Name = parseMnemonicSuffix(Name); 5758 5759 // If the target architecture uses MnemonicAlias, call it here to parse 5760 // operands correctly. 5761 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5762 5763 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5764 5765 bool IsMIMG = Name.startswith("image_"); 5766 5767 while (!trySkipToken(AsmToken::EndOfStatement)) { 5768 OperandMode Mode = OperandMode_Default; 5769 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5770 Mode = OperandMode_NSA; 5771 CPolSeen = 0; 5772 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5773 5774 if (Res != MatchOperand_Success) { 5775 checkUnsupportedInstruction(Name, NameLoc); 5776 if (!Parser.hasPendingError()) { 5777 // FIXME: use real operand location rather than the current location. 5778 StringRef Msg = 5779 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5780 "not a valid operand."; 5781 Error(getLoc(), Msg); 5782 } 5783 while (!trySkipToken(AsmToken::EndOfStatement)) { 5784 lex(); 5785 } 5786 return true; 5787 } 5788 5789 // Eat the comma or space if there is one. 5790 trySkipToken(AsmToken::Comma); 5791 } 5792 5793 return false; 5794 } 5795 5796 //===----------------------------------------------------------------------===// 5797 // Utility functions 5798 //===----------------------------------------------------------------------===// 5799 5800 OperandMatchResultTy 5801 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5802 5803 if (!trySkipId(Prefix, AsmToken::Colon)) 5804 return MatchOperand_NoMatch; 5805 5806 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5807 } 5808 5809 OperandMatchResultTy 5810 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5811 AMDGPUOperand::ImmTy ImmTy, 5812 bool (*ConvertResult)(int64_t&)) { 5813 SMLoc S = getLoc(); 5814 int64_t Value = 0; 5815 5816 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5817 if (Res != MatchOperand_Success) 5818 return Res; 5819 5820 if (ConvertResult && !ConvertResult(Value)) { 5821 Error(S, "invalid " + StringRef(Prefix) + " value."); 5822 } 5823 5824 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5825 return MatchOperand_Success; 5826 } 5827 5828 OperandMatchResultTy 5829 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5830 OperandVector &Operands, 5831 AMDGPUOperand::ImmTy ImmTy, 5832 bool (*ConvertResult)(int64_t&)) { 5833 SMLoc S = getLoc(); 5834 if (!trySkipId(Prefix, AsmToken::Colon)) 5835 return MatchOperand_NoMatch; 5836 5837 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5838 return MatchOperand_ParseFail; 5839 5840 unsigned Val = 0; 5841 const unsigned MaxSize = 4; 5842 5843 // FIXME: How to verify the number of elements matches the number of src 5844 // operands? 5845 for (int I = 0; ; ++I) { 5846 int64_t Op; 5847 SMLoc Loc = getLoc(); 5848 if (!parseExpr(Op)) 5849 return MatchOperand_ParseFail; 5850 5851 if (Op != 0 && Op != 1) { 5852 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5853 return MatchOperand_ParseFail; 5854 } 5855 5856 Val |= (Op << I); 5857 5858 if (trySkipToken(AsmToken::RBrac)) 5859 break; 5860 5861 if (I + 1 == MaxSize) { 5862 Error(getLoc(), "expected a closing square bracket"); 5863 return MatchOperand_ParseFail; 5864 } 5865 5866 if (!skipToken(AsmToken::Comma, "expected a comma")) 5867 return MatchOperand_ParseFail; 5868 } 5869 5870 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5871 return MatchOperand_Success; 5872 } 5873 5874 OperandMatchResultTy 5875 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5876 AMDGPUOperand::ImmTy ImmTy) { 5877 int64_t Bit; 5878 SMLoc S = getLoc(); 5879 5880 if (trySkipId(Name)) { 5881 Bit = 1; 5882 } else if (trySkipId("no", Name)) { 5883 Bit = 0; 5884 } else { 5885 return MatchOperand_NoMatch; 5886 } 5887 5888 if (Name == "r128" && !hasMIMG_R128()) { 5889 Error(S, "r128 modifier is not supported on this GPU"); 5890 return MatchOperand_ParseFail; 5891 } 5892 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5893 Error(S, "a16 modifier is not supported on this GPU"); 5894 return MatchOperand_ParseFail; 5895 } 5896 5897 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5898 ImmTy = AMDGPUOperand::ImmTyR128A16; 5899 5900 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5901 return MatchOperand_Success; 5902 } 5903 5904 OperandMatchResultTy 5905 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5906 unsigned CPolOn = 0; 5907 unsigned CPolOff = 0; 5908 SMLoc S = getLoc(); 5909 5910 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5911 if (isGFX940() && !Mnemo.startswith("s_")) { 5912 if (trySkipId("sc0")) 5913 CPolOn = AMDGPU::CPol::SC0; 5914 else if (trySkipId("nosc0")) 5915 CPolOff = AMDGPU::CPol::SC0; 5916 else if (trySkipId("nt")) 5917 CPolOn = AMDGPU::CPol::NT; 5918 else if (trySkipId("nont")) 5919 CPolOff = AMDGPU::CPol::NT; 5920 else if (trySkipId("sc1")) 5921 CPolOn = AMDGPU::CPol::SC1; 5922 else if (trySkipId("nosc1")) 5923 CPolOff = AMDGPU::CPol::SC1; 5924 else 5925 return MatchOperand_NoMatch; 5926 } 5927 else if (trySkipId("glc")) 5928 CPolOn = AMDGPU::CPol::GLC; 5929 else if (trySkipId("noglc")) 5930 CPolOff = AMDGPU::CPol::GLC; 5931 else if (trySkipId("slc")) 5932 CPolOn = AMDGPU::CPol::SLC; 5933 else if (trySkipId("noslc")) 5934 CPolOff = AMDGPU::CPol::SLC; 5935 else if (trySkipId("dlc")) 5936 CPolOn = AMDGPU::CPol::DLC; 5937 else if (trySkipId("nodlc")) 5938 CPolOff = AMDGPU::CPol::DLC; 5939 else if (trySkipId("scc")) 5940 CPolOn = AMDGPU::CPol::SCC; 5941 else if (trySkipId("noscc")) 5942 CPolOff = AMDGPU::CPol::SCC; 5943 else 5944 return MatchOperand_NoMatch; 5945 5946 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5947 Error(S, "dlc modifier is not supported on this GPU"); 5948 return MatchOperand_ParseFail; 5949 } 5950 5951 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5952 Error(S, "scc modifier is not supported on this GPU"); 5953 return MatchOperand_ParseFail; 5954 } 5955 5956 if (CPolSeen & (CPolOn | CPolOff)) { 5957 Error(S, "duplicate cache policy modifier"); 5958 return MatchOperand_ParseFail; 5959 } 5960 5961 CPolSeen |= (CPolOn | CPolOff); 5962 5963 for (unsigned I = 1; I != Operands.size(); ++I) { 5964 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5965 if (Op.isCPol()) { 5966 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5967 return MatchOperand_Success; 5968 } 5969 } 5970 5971 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5972 AMDGPUOperand::ImmTyCPol)); 5973 5974 return MatchOperand_Success; 5975 } 5976 5977 static void addOptionalImmOperand( 5978 MCInst& Inst, const OperandVector& Operands, 5979 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5980 AMDGPUOperand::ImmTy ImmT, 5981 int64_t Default = 0) { 5982 auto i = OptionalIdx.find(ImmT); 5983 if (i != OptionalIdx.end()) { 5984 unsigned Idx = i->second; 5985 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5986 } else { 5987 Inst.addOperand(MCOperand::createImm(Default)); 5988 } 5989 } 5990 5991 OperandMatchResultTy 5992 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5993 StringRef &Value, 5994 SMLoc &StringLoc) { 5995 if (!trySkipId(Prefix, AsmToken::Colon)) 5996 return MatchOperand_NoMatch; 5997 5998 StringLoc = getLoc(); 5999 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6000 : MatchOperand_ParseFail; 6001 } 6002 6003 //===----------------------------------------------------------------------===// 6004 // MTBUF format 6005 //===----------------------------------------------------------------------===// 6006 6007 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6008 int64_t MaxVal, 6009 int64_t &Fmt) { 6010 int64_t Val; 6011 SMLoc Loc = getLoc(); 6012 6013 auto Res = parseIntWithPrefix(Pref, Val); 6014 if (Res == MatchOperand_ParseFail) 6015 return false; 6016 if (Res == MatchOperand_NoMatch) 6017 return true; 6018 6019 if (Val < 0 || Val > MaxVal) { 6020 Error(Loc, Twine("out of range ", StringRef(Pref))); 6021 return false; 6022 } 6023 6024 Fmt = Val; 6025 return true; 6026 } 6027 6028 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6029 // values to live in a joint format operand in the MCInst encoding. 6030 OperandMatchResultTy 6031 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6032 using namespace llvm::AMDGPU::MTBUFFormat; 6033 6034 int64_t Dfmt = DFMT_UNDEF; 6035 int64_t Nfmt = NFMT_UNDEF; 6036 6037 // dfmt and nfmt can appear in either order, and each is optional. 6038 for (int I = 0; I < 2; ++I) { 6039 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6040 return MatchOperand_ParseFail; 6041 6042 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6043 return MatchOperand_ParseFail; 6044 } 6045 // Skip optional comma between dfmt/nfmt 6046 // but guard against 2 commas following each other. 6047 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6048 !peekToken().is(AsmToken::Comma)) { 6049 trySkipToken(AsmToken::Comma); 6050 } 6051 } 6052 6053 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6054 return MatchOperand_NoMatch; 6055 6056 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6057 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6058 6059 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6060 return MatchOperand_Success; 6061 } 6062 6063 OperandMatchResultTy 6064 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6065 using namespace llvm::AMDGPU::MTBUFFormat; 6066 6067 int64_t Fmt = UFMT_UNDEF; 6068 6069 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6070 return MatchOperand_ParseFail; 6071 6072 if (Fmt == UFMT_UNDEF) 6073 return MatchOperand_NoMatch; 6074 6075 Format = Fmt; 6076 return MatchOperand_Success; 6077 } 6078 6079 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6080 int64_t &Nfmt, 6081 StringRef FormatStr, 6082 SMLoc Loc) { 6083 using namespace llvm::AMDGPU::MTBUFFormat; 6084 int64_t Format; 6085 6086 Format = getDfmt(FormatStr); 6087 if (Format != DFMT_UNDEF) { 6088 Dfmt = Format; 6089 return true; 6090 } 6091 6092 Format = getNfmt(FormatStr, getSTI()); 6093 if (Format != NFMT_UNDEF) { 6094 Nfmt = Format; 6095 return true; 6096 } 6097 6098 Error(Loc, "unsupported format"); 6099 return false; 6100 } 6101 6102 OperandMatchResultTy 6103 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6104 SMLoc FormatLoc, 6105 int64_t &Format) { 6106 using namespace llvm::AMDGPU::MTBUFFormat; 6107 6108 int64_t Dfmt = DFMT_UNDEF; 6109 int64_t Nfmt = NFMT_UNDEF; 6110 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6111 return MatchOperand_ParseFail; 6112 6113 if (trySkipToken(AsmToken::Comma)) { 6114 StringRef Str; 6115 SMLoc Loc = getLoc(); 6116 if (!parseId(Str, "expected a format string") || 6117 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6118 return MatchOperand_ParseFail; 6119 } 6120 if (Dfmt == DFMT_UNDEF) { 6121 Error(Loc, "duplicate numeric format"); 6122 return MatchOperand_ParseFail; 6123 } else if (Nfmt == NFMT_UNDEF) { 6124 Error(Loc, "duplicate data format"); 6125 return MatchOperand_ParseFail; 6126 } 6127 } 6128 6129 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6130 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6131 6132 if (isGFX10Plus()) { 6133 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6134 if (Ufmt == UFMT_UNDEF) { 6135 Error(FormatLoc, "unsupported format"); 6136 return MatchOperand_ParseFail; 6137 } 6138 Format = Ufmt; 6139 } else { 6140 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6141 } 6142 6143 return MatchOperand_Success; 6144 } 6145 6146 OperandMatchResultTy 6147 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6148 SMLoc Loc, 6149 int64_t &Format) { 6150 using namespace llvm::AMDGPU::MTBUFFormat; 6151 6152 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6153 if (Id == UFMT_UNDEF) 6154 return MatchOperand_NoMatch; 6155 6156 if (!isGFX10Plus()) { 6157 Error(Loc, "unified format is not supported on this GPU"); 6158 return MatchOperand_ParseFail; 6159 } 6160 6161 Format = Id; 6162 return MatchOperand_Success; 6163 } 6164 6165 OperandMatchResultTy 6166 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6167 using namespace llvm::AMDGPU::MTBUFFormat; 6168 SMLoc Loc = getLoc(); 6169 6170 if (!parseExpr(Format)) 6171 return MatchOperand_ParseFail; 6172 if (!isValidFormatEncoding(Format, getSTI())) { 6173 Error(Loc, "out of range format"); 6174 return MatchOperand_ParseFail; 6175 } 6176 6177 return MatchOperand_Success; 6178 } 6179 6180 OperandMatchResultTy 6181 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6182 using namespace llvm::AMDGPU::MTBUFFormat; 6183 6184 if (!trySkipId("format", AsmToken::Colon)) 6185 return MatchOperand_NoMatch; 6186 6187 if (trySkipToken(AsmToken::LBrac)) { 6188 StringRef FormatStr; 6189 SMLoc Loc = getLoc(); 6190 if (!parseId(FormatStr, "expected a format string")) 6191 return MatchOperand_ParseFail; 6192 6193 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6194 if (Res == MatchOperand_NoMatch) 6195 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6196 if (Res != MatchOperand_Success) 6197 return Res; 6198 6199 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6200 return MatchOperand_ParseFail; 6201 6202 return MatchOperand_Success; 6203 } 6204 6205 return parseNumericFormat(Format); 6206 } 6207 6208 OperandMatchResultTy 6209 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6210 using namespace llvm::AMDGPU::MTBUFFormat; 6211 6212 int64_t Format = getDefaultFormatEncoding(getSTI()); 6213 OperandMatchResultTy Res; 6214 SMLoc Loc = getLoc(); 6215 6216 // Parse legacy format syntax. 6217 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6218 if (Res == MatchOperand_ParseFail) 6219 return Res; 6220 6221 bool FormatFound = (Res == MatchOperand_Success); 6222 6223 Operands.push_back( 6224 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6225 6226 if (FormatFound) 6227 trySkipToken(AsmToken::Comma); 6228 6229 if (isToken(AsmToken::EndOfStatement)) { 6230 // We are expecting an soffset operand, 6231 // but let matcher handle the error. 6232 return MatchOperand_Success; 6233 } 6234 6235 // Parse soffset. 6236 Res = parseRegOrImm(Operands); 6237 if (Res != MatchOperand_Success) 6238 return Res; 6239 6240 trySkipToken(AsmToken::Comma); 6241 6242 if (!FormatFound) { 6243 Res = parseSymbolicOrNumericFormat(Format); 6244 if (Res == MatchOperand_ParseFail) 6245 return Res; 6246 if (Res == MatchOperand_Success) { 6247 auto Size = Operands.size(); 6248 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6249 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6250 Op.setImm(Format); 6251 } 6252 return MatchOperand_Success; 6253 } 6254 6255 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6256 Error(getLoc(), "duplicate format"); 6257 return MatchOperand_ParseFail; 6258 } 6259 return MatchOperand_Success; 6260 } 6261 6262 //===----------------------------------------------------------------------===// 6263 // ds 6264 //===----------------------------------------------------------------------===// 6265 6266 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6267 const OperandVector &Operands) { 6268 OptionalImmIndexMap OptionalIdx; 6269 6270 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6271 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6272 6273 // Add the register arguments 6274 if (Op.isReg()) { 6275 Op.addRegOperands(Inst, 1); 6276 continue; 6277 } 6278 6279 // Handle optional arguments 6280 OptionalIdx[Op.getImmTy()] = i; 6281 } 6282 6283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6286 6287 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6288 } 6289 6290 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6291 bool IsGdsHardcoded) { 6292 OptionalImmIndexMap OptionalIdx; 6293 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6294 6295 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6296 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6297 6298 // Add the register arguments 6299 if (Op.isReg()) { 6300 Op.addRegOperands(Inst, 1); 6301 continue; 6302 } 6303 6304 if (Op.isToken() && Op.getToken() == "gds") { 6305 IsGdsHardcoded = true; 6306 continue; 6307 } 6308 6309 // Handle optional arguments 6310 OptionalIdx[Op.getImmTy()] = i; 6311 6312 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6313 OffsetType = AMDGPUOperand::ImmTySwizzle; 6314 } 6315 6316 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6317 6318 if (!IsGdsHardcoded) { 6319 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6320 } 6321 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6322 } 6323 6324 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6325 OptionalImmIndexMap OptionalIdx; 6326 6327 unsigned OperandIdx[4]; 6328 unsigned EnMask = 0; 6329 int SrcIdx = 0; 6330 6331 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6332 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6333 6334 // Add the register arguments 6335 if (Op.isReg()) { 6336 assert(SrcIdx < 4); 6337 OperandIdx[SrcIdx] = Inst.size(); 6338 Op.addRegOperands(Inst, 1); 6339 ++SrcIdx; 6340 continue; 6341 } 6342 6343 if (Op.isOff()) { 6344 assert(SrcIdx < 4); 6345 OperandIdx[SrcIdx] = Inst.size(); 6346 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6347 ++SrcIdx; 6348 continue; 6349 } 6350 6351 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6352 Op.addImmOperands(Inst, 1); 6353 continue; 6354 } 6355 6356 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6357 continue; 6358 6359 // Handle optional arguments 6360 OptionalIdx[Op.getImmTy()] = i; 6361 } 6362 6363 assert(SrcIdx == 4); 6364 6365 bool Compr = false; 6366 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6367 Compr = true; 6368 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6369 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6370 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6371 } 6372 6373 for (auto i = 0; i < SrcIdx; ++i) { 6374 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6375 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6376 } 6377 } 6378 6379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6380 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6381 6382 Inst.addOperand(MCOperand::createImm(EnMask)); 6383 } 6384 6385 //===----------------------------------------------------------------------===// 6386 // s_waitcnt 6387 //===----------------------------------------------------------------------===// 6388 6389 static bool 6390 encodeCnt( 6391 const AMDGPU::IsaVersion ISA, 6392 int64_t &IntVal, 6393 int64_t CntVal, 6394 bool Saturate, 6395 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6396 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6397 { 6398 bool Failed = false; 6399 6400 IntVal = encode(ISA, IntVal, CntVal); 6401 if (CntVal != decode(ISA, IntVal)) { 6402 if (Saturate) { 6403 IntVal = encode(ISA, IntVal, -1); 6404 } else { 6405 Failed = true; 6406 } 6407 } 6408 return Failed; 6409 } 6410 6411 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6412 6413 SMLoc CntLoc = getLoc(); 6414 StringRef CntName = getTokenStr(); 6415 6416 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6417 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6418 return false; 6419 6420 int64_t CntVal; 6421 SMLoc ValLoc = getLoc(); 6422 if (!parseExpr(CntVal)) 6423 return false; 6424 6425 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6426 6427 bool Failed = true; 6428 bool Sat = CntName.endswith("_sat"); 6429 6430 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6431 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6432 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6433 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6434 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6435 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6436 } else { 6437 Error(CntLoc, "invalid counter name " + CntName); 6438 return false; 6439 } 6440 6441 if (Failed) { 6442 Error(ValLoc, "too large value for " + CntName); 6443 return false; 6444 } 6445 6446 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6447 return false; 6448 6449 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6450 if (isToken(AsmToken::EndOfStatement)) { 6451 Error(getLoc(), "expected a counter name"); 6452 return false; 6453 } 6454 } 6455 6456 return true; 6457 } 6458 6459 OperandMatchResultTy 6460 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6461 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6462 int64_t Waitcnt = getWaitcntBitMask(ISA); 6463 SMLoc S = getLoc(); 6464 6465 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6466 while (!isToken(AsmToken::EndOfStatement)) { 6467 if (!parseCnt(Waitcnt)) 6468 return MatchOperand_ParseFail; 6469 } 6470 } else { 6471 if (!parseExpr(Waitcnt)) 6472 return MatchOperand_ParseFail; 6473 } 6474 6475 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6476 return MatchOperand_Success; 6477 } 6478 6479 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6480 SMLoc FieldLoc = getLoc(); 6481 StringRef FieldName = getTokenStr(); 6482 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6483 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6484 return false; 6485 6486 SMLoc ValueLoc = getLoc(); 6487 StringRef ValueName = getTokenStr(); 6488 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6489 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6490 return false; 6491 6492 unsigned Shift; 6493 if (FieldName == "instid0") { 6494 Shift = 0; 6495 } else if (FieldName == "instskip") { 6496 Shift = 4; 6497 } else if (FieldName == "instid1") { 6498 Shift = 7; 6499 } else { 6500 Error(FieldLoc, "invalid field name " + FieldName); 6501 return false; 6502 } 6503 6504 int Value; 6505 if (Shift == 4) { 6506 // Parse values for instskip. 6507 Value = StringSwitch<int>(ValueName) 6508 .Case("SAME", 0) 6509 .Case("NEXT", 1) 6510 .Case("SKIP_1", 2) 6511 .Case("SKIP_2", 3) 6512 .Case("SKIP_3", 4) 6513 .Case("SKIP_4", 5) 6514 .Default(-1); 6515 } else { 6516 // Parse values for instid0 and instid1. 6517 Value = StringSwitch<int>(ValueName) 6518 .Case("NO_DEP", 0) 6519 .Case("VALU_DEP_1", 1) 6520 .Case("VALU_DEP_2", 2) 6521 .Case("VALU_DEP_3", 3) 6522 .Case("VALU_DEP_4", 4) 6523 .Case("TRANS32_DEP_1", 5) 6524 .Case("TRANS32_DEP_2", 6) 6525 .Case("TRANS32_DEP_3", 7) 6526 .Case("FMA_ACCUM_CYCLE_1", 8) 6527 .Case("SALU_CYCLE_1", 9) 6528 .Case("SALU_CYCLE_2", 10) 6529 .Case("SALU_CYCLE_3", 11) 6530 .Default(-1); 6531 } 6532 if (Value < 0) { 6533 Error(ValueLoc, "invalid value name " + ValueName); 6534 return false; 6535 } 6536 6537 Delay |= Value << Shift; 6538 return true; 6539 } 6540 6541 OperandMatchResultTy 6542 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6543 int64_t Delay = 0; 6544 SMLoc S = getLoc(); 6545 6546 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6547 do { 6548 if (!parseDelay(Delay)) 6549 return MatchOperand_ParseFail; 6550 } while (trySkipToken(AsmToken::Pipe)); 6551 } else { 6552 if (!parseExpr(Delay)) 6553 return MatchOperand_ParseFail; 6554 } 6555 6556 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6557 return MatchOperand_Success; 6558 } 6559 6560 bool 6561 AMDGPUOperand::isSWaitCnt() const { 6562 return isImm(); 6563 } 6564 6565 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6566 6567 //===----------------------------------------------------------------------===// 6568 // DepCtr 6569 //===----------------------------------------------------------------------===// 6570 6571 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6572 StringRef DepCtrName) { 6573 switch (ErrorId) { 6574 case OPR_ID_UNKNOWN: 6575 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6576 return; 6577 case OPR_ID_UNSUPPORTED: 6578 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6579 return; 6580 case OPR_ID_DUPLICATE: 6581 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6582 return; 6583 case OPR_VAL_INVALID: 6584 Error(Loc, Twine("invalid value for ", DepCtrName)); 6585 return; 6586 default: 6587 assert(false); 6588 } 6589 } 6590 6591 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6592 6593 using namespace llvm::AMDGPU::DepCtr; 6594 6595 SMLoc DepCtrLoc = getLoc(); 6596 StringRef DepCtrName = getTokenStr(); 6597 6598 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6599 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6600 return false; 6601 6602 int64_t ExprVal; 6603 if (!parseExpr(ExprVal)) 6604 return false; 6605 6606 unsigned PrevOprMask = UsedOprMask; 6607 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6608 6609 if (CntVal < 0) { 6610 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6611 return false; 6612 } 6613 6614 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6615 return false; 6616 6617 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6618 if (isToken(AsmToken::EndOfStatement)) { 6619 Error(getLoc(), "expected a counter name"); 6620 return false; 6621 } 6622 } 6623 6624 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6625 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6626 return true; 6627 } 6628 6629 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6630 using namespace llvm::AMDGPU::DepCtr; 6631 6632 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6633 SMLoc Loc = getLoc(); 6634 6635 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6636 unsigned UsedOprMask = 0; 6637 while (!isToken(AsmToken::EndOfStatement)) { 6638 if (!parseDepCtr(DepCtr, UsedOprMask)) 6639 return MatchOperand_ParseFail; 6640 } 6641 } else { 6642 if (!parseExpr(DepCtr)) 6643 return MatchOperand_ParseFail; 6644 } 6645 6646 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6647 return MatchOperand_Success; 6648 } 6649 6650 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6651 6652 //===----------------------------------------------------------------------===// 6653 // hwreg 6654 //===----------------------------------------------------------------------===// 6655 6656 bool 6657 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6658 OperandInfoTy &Offset, 6659 OperandInfoTy &Width) { 6660 using namespace llvm::AMDGPU::Hwreg; 6661 6662 // The register may be specified by name or using a numeric code 6663 HwReg.Loc = getLoc(); 6664 if (isToken(AsmToken::Identifier) && 6665 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6666 HwReg.IsSymbolic = true; 6667 lex(); // skip register name 6668 } else if (!parseExpr(HwReg.Id, "a register name")) { 6669 return false; 6670 } 6671 6672 if (trySkipToken(AsmToken::RParen)) 6673 return true; 6674 6675 // parse optional params 6676 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6677 return false; 6678 6679 Offset.Loc = getLoc(); 6680 if (!parseExpr(Offset.Id)) 6681 return false; 6682 6683 if (!skipToken(AsmToken::Comma, "expected a comma")) 6684 return false; 6685 6686 Width.Loc = getLoc(); 6687 return parseExpr(Width.Id) && 6688 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6689 } 6690 6691 bool 6692 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6693 const OperandInfoTy &Offset, 6694 const OperandInfoTy &Width) { 6695 6696 using namespace llvm::AMDGPU::Hwreg; 6697 6698 if (HwReg.IsSymbolic) { 6699 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6700 Error(HwReg.Loc, 6701 "specified hardware register is not supported on this GPU"); 6702 return false; 6703 } 6704 } else { 6705 if (!isValidHwreg(HwReg.Id)) { 6706 Error(HwReg.Loc, 6707 "invalid code of hardware register: only 6-bit values are legal"); 6708 return false; 6709 } 6710 } 6711 if (!isValidHwregOffset(Offset.Id)) { 6712 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6713 return false; 6714 } 6715 if (!isValidHwregWidth(Width.Id)) { 6716 Error(Width.Loc, 6717 "invalid bitfield width: only values from 1 to 32 are legal"); 6718 return false; 6719 } 6720 return true; 6721 } 6722 6723 OperandMatchResultTy 6724 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6725 using namespace llvm::AMDGPU::Hwreg; 6726 6727 int64_t ImmVal = 0; 6728 SMLoc Loc = getLoc(); 6729 6730 if (trySkipId("hwreg", AsmToken::LParen)) { 6731 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6732 OperandInfoTy Offset(OFFSET_DEFAULT_); 6733 OperandInfoTy Width(WIDTH_DEFAULT_); 6734 if (parseHwregBody(HwReg, Offset, Width) && 6735 validateHwreg(HwReg, Offset, Width)) { 6736 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6737 } else { 6738 return MatchOperand_ParseFail; 6739 } 6740 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6741 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6742 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6743 return MatchOperand_ParseFail; 6744 } 6745 } else { 6746 return MatchOperand_ParseFail; 6747 } 6748 6749 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6750 return MatchOperand_Success; 6751 } 6752 6753 bool AMDGPUOperand::isHwreg() const { 6754 return isImmTy(ImmTyHwreg); 6755 } 6756 6757 //===----------------------------------------------------------------------===// 6758 // sendmsg 6759 //===----------------------------------------------------------------------===// 6760 6761 bool 6762 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6763 OperandInfoTy &Op, 6764 OperandInfoTy &Stream) { 6765 using namespace llvm::AMDGPU::SendMsg; 6766 6767 Msg.Loc = getLoc(); 6768 if (isToken(AsmToken::Identifier) && 6769 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6770 Msg.IsSymbolic = true; 6771 lex(); // skip message name 6772 } else if (!parseExpr(Msg.Id, "a message name")) { 6773 return false; 6774 } 6775 6776 if (trySkipToken(AsmToken::Comma)) { 6777 Op.IsDefined = true; 6778 Op.Loc = getLoc(); 6779 if (isToken(AsmToken::Identifier) && 6780 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6781 lex(); // skip operation name 6782 } else if (!parseExpr(Op.Id, "an operation name")) { 6783 return false; 6784 } 6785 6786 if (trySkipToken(AsmToken::Comma)) { 6787 Stream.IsDefined = true; 6788 Stream.Loc = getLoc(); 6789 if (!parseExpr(Stream.Id)) 6790 return false; 6791 } 6792 } 6793 6794 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6795 } 6796 6797 bool 6798 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6799 const OperandInfoTy &Op, 6800 const OperandInfoTy &Stream) { 6801 using namespace llvm::AMDGPU::SendMsg; 6802 6803 // Validation strictness depends on whether message is specified 6804 // in a symbolic or in a numeric form. In the latter case 6805 // only encoding possibility is checked. 6806 bool Strict = Msg.IsSymbolic; 6807 6808 if (Strict) { 6809 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6810 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6811 return false; 6812 } 6813 } else { 6814 if (!isValidMsgId(Msg.Id, getSTI())) { 6815 Error(Msg.Loc, "invalid message id"); 6816 return false; 6817 } 6818 } 6819 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6820 if (Op.IsDefined) { 6821 Error(Op.Loc, "message does not support operations"); 6822 } else { 6823 Error(Msg.Loc, "missing message operation"); 6824 } 6825 return false; 6826 } 6827 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6828 Error(Op.Loc, "invalid operation id"); 6829 return false; 6830 } 6831 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6832 Stream.IsDefined) { 6833 Error(Stream.Loc, "message operation does not support streams"); 6834 return false; 6835 } 6836 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6837 Error(Stream.Loc, "invalid message stream id"); 6838 return false; 6839 } 6840 return true; 6841 } 6842 6843 OperandMatchResultTy 6844 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6845 using namespace llvm::AMDGPU::SendMsg; 6846 6847 int64_t ImmVal = 0; 6848 SMLoc Loc = getLoc(); 6849 6850 if (trySkipId("sendmsg", AsmToken::LParen)) { 6851 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6852 OperandInfoTy Op(OP_NONE_); 6853 OperandInfoTy Stream(STREAM_ID_NONE_); 6854 if (parseSendMsgBody(Msg, Op, Stream) && 6855 validateSendMsg(Msg, Op, Stream)) { 6856 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6857 } else { 6858 return MatchOperand_ParseFail; 6859 } 6860 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6861 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6862 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6863 return MatchOperand_ParseFail; 6864 } 6865 } else { 6866 return MatchOperand_ParseFail; 6867 } 6868 6869 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6870 return MatchOperand_Success; 6871 } 6872 6873 bool AMDGPUOperand::isSendMsg() const { 6874 return isImmTy(ImmTySendMsg); 6875 } 6876 6877 //===----------------------------------------------------------------------===// 6878 // v_interp 6879 //===----------------------------------------------------------------------===// 6880 6881 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6882 StringRef Str; 6883 SMLoc S = getLoc(); 6884 6885 if (!parseId(Str)) 6886 return MatchOperand_NoMatch; 6887 6888 int Slot = StringSwitch<int>(Str) 6889 .Case("p10", 0) 6890 .Case("p20", 1) 6891 .Case("p0", 2) 6892 .Default(-1); 6893 6894 if (Slot == -1) { 6895 Error(S, "invalid interpolation slot"); 6896 return MatchOperand_ParseFail; 6897 } 6898 6899 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6900 AMDGPUOperand::ImmTyInterpSlot)); 6901 return MatchOperand_Success; 6902 } 6903 6904 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6905 StringRef Str; 6906 SMLoc S = getLoc(); 6907 6908 if (!parseId(Str)) 6909 return MatchOperand_NoMatch; 6910 6911 if (!Str.startswith("attr")) { 6912 Error(S, "invalid interpolation attribute"); 6913 return MatchOperand_ParseFail; 6914 } 6915 6916 StringRef Chan = Str.take_back(2); 6917 int AttrChan = StringSwitch<int>(Chan) 6918 .Case(".x", 0) 6919 .Case(".y", 1) 6920 .Case(".z", 2) 6921 .Case(".w", 3) 6922 .Default(-1); 6923 if (AttrChan == -1) { 6924 Error(S, "invalid or missing interpolation attribute channel"); 6925 return MatchOperand_ParseFail; 6926 } 6927 6928 Str = Str.drop_back(2).drop_front(4); 6929 6930 uint8_t Attr; 6931 if (Str.getAsInteger(10, Attr)) { 6932 Error(S, "invalid or missing interpolation attribute number"); 6933 return MatchOperand_ParseFail; 6934 } 6935 6936 if (Attr > 63) { 6937 Error(S, "out of bounds interpolation attribute number"); 6938 return MatchOperand_ParseFail; 6939 } 6940 6941 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6942 6943 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6944 AMDGPUOperand::ImmTyInterpAttr)); 6945 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6946 AMDGPUOperand::ImmTyAttrChan)); 6947 return MatchOperand_Success; 6948 } 6949 6950 //===----------------------------------------------------------------------===// 6951 // exp 6952 //===----------------------------------------------------------------------===// 6953 6954 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6955 using namespace llvm::AMDGPU::Exp; 6956 6957 StringRef Str; 6958 SMLoc S = getLoc(); 6959 6960 if (!parseId(Str)) 6961 return MatchOperand_NoMatch; 6962 6963 unsigned Id = getTgtId(Str); 6964 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6965 Error(S, (Id == ET_INVALID) ? 6966 "invalid exp target" : 6967 "exp target is not supported on this GPU"); 6968 return MatchOperand_ParseFail; 6969 } 6970 6971 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6972 AMDGPUOperand::ImmTyExpTgt)); 6973 return MatchOperand_Success; 6974 } 6975 6976 //===----------------------------------------------------------------------===// 6977 // parser helpers 6978 //===----------------------------------------------------------------------===// 6979 6980 bool 6981 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6982 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6983 } 6984 6985 bool 6986 AMDGPUAsmParser::isId(const StringRef Id) const { 6987 return isId(getToken(), Id); 6988 } 6989 6990 bool 6991 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6992 return getTokenKind() == Kind; 6993 } 6994 6995 bool 6996 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6997 if (isId(Id)) { 6998 lex(); 6999 return true; 7000 } 7001 return false; 7002 } 7003 7004 bool 7005 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7006 if (isToken(AsmToken::Identifier)) { 7007 StringRef Tok = getTokenStr(); 7008 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7009 lex(); 7010 return true; 7011 } 7012 } 7013 return false; 7014 } 7015 7016 bool 7017 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7018 if (isId(Id) && peekToken().is(Kind)) { 7019 lex(); 7020 lex(); 7021 return true; 7022 } 7023 return false; 7024 } 7025 7026 bool 7027 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7028 if (isToken(Kind)) { 7029 lex(); 7030 return true; 7031 } 7032 return false; 7033 } 7034 7035 bool 7036 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7037 const StringRef ErrMsg) { 7038 if (!trySkipToken(Kind)) { 7039 Error(getLoc(), ErrMsg); 7040 return false; 7041 } 7042 return true; 7043 } 7044 7045 bool 7046 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7047 SMLoc S = getLoc(); 7048 7049 const MCExpr *Expr; 7050 if (Parser.parseExpression(Expr)) 7051 return false; 7052 7053 if (Expr->evaluateAsAbsolute(Imm)) 7054 return true; 7055 7056 if (Expected.empty()) { 7057 Error(S, "expected absolute expression"); 7058 } else { 7059 Error(S, Twine("expected ", Expected) + 7060 Twine(" or an absolute expression")); 7061 } 7062 return false; 7063 } 7064 7065 bool 7066 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7067 SMLoc S = getLoc(); 7068 7069 const MCExpr *Expr; 7070 if (Parser.parseExpression(Expr)) 7071 return false; 7072 7073 int64_t IntVal; 7074 if (Expr->evaluateAsAbsolute(IntVal)) { 7075 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7076 } else { 7077 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7078 } 7079 return true; 7080 } 7081 7082 bool 7083 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7084 if (isToken(AsmToken::String)) { 7085 Val = getToken().getStringContents(); 7086 lex(); 7087 return true; 7088 } else { 7089 Error(getLoc(), ErrMsg); 7090 return false; 7091 } 7092 } 7093 7094 bool 7095 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7096 if (isToken(AsmToken::Identifier)) { 7097 Val = getTokenStr(); 7098 lex(); 7099 return true; 7100 } else { 7101 if (!ErrMsg.empty()) 7102 Error(getLoc(), ErrMsg); 7103 return false; 7104 } 7105 } 7106 7107 AsmToken 7108 AMDGPUAsmParser::getToken() const { 7109 return Parser.getTok(); 7110 } 7111 7112 AsmToken 7113 AMDGPUAsmParser::peekToken() { 7114 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7115 } 7116 7117 void 7118 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7119 auto TokCount = getLexer().peekTokens(Tokens); 7120 7121 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7122 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7123 } 7124 7125 AsmToken::TokenKind 7126 AMDGPUAsmParser::getTokenKind() const { 7127 return getLexer().getKind(); 7128 } 7129 7130 SMLoc 7131 AMDGPUAsmParser::getLoc() const { 7132 return getToken().getLoc(); 7133 } 7134 7135 StringRef 7136 AMDGPUAsmParser::getTokenStr() const { 7137 return getToken().getString(); 7138 } 7139 7140 void 7141 AMDGPUAsmParser::lex() { 7142 Parser.Lex(); 7143 } 7144 7145 SMLoc 7146 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7147 const OperandVector &Operands) const { 7148 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7149 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7150 if (Test(Op)) 7151 return Op.getStartLoc(); 7152 } 7153 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7154 } 7155 7156 SMLoc 7157 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7158 const OperandVector &Operands) const { 7159 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7160 return getOperandLoc(Test, Operands); 7161 } 7162 7163 SMLoc 7164 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7165 const OperandVector &Operands) const { 7166 auto Test = [=](const AMDGPUOperand& Op) { 7167 return Op.isRegKind() && Op.getReg() == Reg; 7168 }; 7169 return getOperandLoc(Test, Operands); 7170 } 7171 7172 SMLoc 7173 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7174 auto Test = [](const AMDGPUOperand& Op) { 7175 return Op.IsImmKindLiteral() || Op.isExpr(); 7176 }; 7177 return getOperandLoc(Test, Operands); 7178 } 7179 7180 SMLoc 7181 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7182 auto Test = [](const AMDGPUOperand& Op) { 7183 return Op.isImmKindConst(); 7184 }; 7185 return getOperandLoc(Test, Operands); 7186 } 7187 7188 //===----------------------------------------------------------------------===// 7189 // swizzle 7190 //===----------------------------------------------------------------------===// 7191 7192 LLVM_READNONE 7193 static unsigned 7194 encodeBitmaskPerm(const unsigned AndMask, 7195 const unsigned OrMask, 7196 const unsigned XorMask) { 7197 using namespace llvm::AMDGPU::Swizzle; 7198 7199 return BITMASK_PERM_ENC | 7200 (AndMask << BITMASK_AND_SHIFT) | 7201 (OrMask << BITMASK_OR_SHIFT) | 7202 (XorMask << BITMASK_XOR_SHIFT); 7203 } 7204 7205 bool 7206 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7207 const unsigned MinVal, 7208 const unsigned MaxVal, 7209 const StringRef ErrMsg, 7210 SMLoc &Loc) { 7211 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7212 return false; 7213 } 7214 Loc = getLoc(); 7215 if (!parseExpr(Op)) { 7216 return false; 7217 } 7218 if (Op < MinVal || Op > MaxVal) { 7219 Error(Loc, ErrMsg); 7220 return false; 7221 } 7222 7223 return true; 7224 } 7225 7226 bool 7227 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7228 const unsigned MinVal, 7229 const unsigned MaxVal, 7230 const StringRef ErrMsg) { 7231 SMLoc Loc; 7232 for (unsigned i = 0; i < OpNum; ++i) { 7233 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7234 return false; 7235 } 7236 7237 return true; 7238 } 7239 7240 bool 7241 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7242 using namespace llvm::AMDGPU::Swizzle; 7243 7244 int64_t Lane[LANE_NUM]; 7245 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7246 "expected a 2-bit lane id")) { 7247 Imm = QUAD_PERM_ENC; 7248 for (unsigned I = 0; I < LANE_NUM; ++I) { 7249 Imm |= Lane[I] << (LANE_SHIFT * I); 7250 } 7251 return true; 7252 } 7253 return false; 7254 } 7255 7256 bool 7257 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7258 using namespace llvm::AMDGPU::Swizzle; 7259 7260 SMLoc Loc; 7261 int64_t GroupSize; 7262 int64_t LaneIdx; 7263 7264 if (!parseSwizzleOperand(GroupSize, 7265 2, 32, 7266 "group size must be in the interval [2,32]", 7267 Loc)) { 7268 return false; 7269 } 7270 if (!isPowerOf2_64(GroupSize)) { 7271 Error(Loc, "group size must be a power of two"); 7272 return false; 7273 } 7274 if (parseSwizzleOperand(LaneIdx, 7275 0, GroupSize - 1, 7276 "lane id must be in the interval [0,group size - 1]", 7277 Loc)) { 7278 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7279 return true; 7280 } 7281 return false; 7282 } 7283 7284 bool 7285 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7286 using namespace llvm::AMDGPU::Swizzle; 7287 7288 SMLoc Loc; 7289 int64_t GroupSize; 7290 7291 if (!parseSwizzleOperand(GroupSize, 7292 2, 32, 7293 "group size must be in the interval [2,32]", 7294 Loc)) { 7295 return false; 7296 } 7297 if (!isPowerOf2_64(GroupSize)) { 7298 Error(Loc, "group size must be a power of two"); 7299 return false; 7300 } 7301 7302 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7303 return true; 7304 } 7305 7306 bool 7307 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7308 using namespace llvm::AMDGPU::Swizzle; 7309 7310 SMLoc Loc; 7311 int64_t GroupSize; 7312 7313 if (!parseSwizzleOperand(GroupSize, 7314 1, 16, 7315 "group size must be in the interval [1,16]", 7316 Loc)) { 7317 return false; 7318 } 7319 if (!isPowerOf2_64(GroupSize)) { 7320 Error(Loc, "group size must be a power of two"); 7321 return false; 7322 } 7323 7324 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7325 return true; 7326 } 7327 7328 bool 7329 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7330 using namespace llvm::AMDGPU::Swizzle; 7331 7332 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7333 return false; 7334 } 7335 7336 StringRef Ctl; 7337 SMLoc StrLoc = getLoc(); 7338 if (!parseString(Ctl)) { 7339 return false; 7340 } 7341 if (Ctl.size() != BITMASK_WIDTH) { 7342 Error(StrLoc, "expected a 5-character mask"); 7343 return false; 7344 } 7345 7346 unsigned AndMask = 0; 7347 unsigned OrMask = 0; 7348 unsigned XorMask = 0; 7349 7350 for (size_t i = 0; i < Ctl.size(); ++i) { 7351 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7352 switch(Ctl[i]) { 7353 default: 7354 Error(StrLoc, "invalid mask"); 7355 return false; 7356 case '0': 7357 break; 7358 case '1': 7359 OrMask |= Mask; 7360 break; 7361 case 'p': 7362 AndMask |= Mask; 7363 break; 7364 case 'i': 7365 AndMask |= Mask; 7366 XorMask |= Mask; 7367 break; 7368 } 7369 } 7370 7371 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7372 return true; 7373 } 7374 7375 bool 7376 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7377 7378 SMLoc OffsetLoc = getLoc(); 7379 7380 if (!parseExpr(Imm, "a swizzle macro")) { 7381 return false; 7382 } 7383 if (!isUInt<16>(Imm)) { 7384 Error(OffsetLoc, "expected a 16-bit offset"); 7385 return false; 7386 } 7387 return true; 7388 } 7389 7390 bool 7391 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7392 using namespace llvm::AMDGPU::Swizzle; 7393 7394 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7395 7396 SMLoc ModeLoc = getLoc(); 7397 bool Ok = false; 7398 7399 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7400 Ok = parseSwizzleQuadPerm(Imm); 7401 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7402 Ok = parseSwizzleBitmaskPerm(Imm); 7403 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7404 Ok = parseSwizzleBroadcast(Imm); 7405 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7406 Ok = parseSwizzleSwap(Imm); 7407 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7408 Ok = parseSwizzleReverse(Imm); 7409 } else { 7410 Error(ModeLoc, "expected a swizzle mode"); 7411 } 7412 7413 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7414 } 7415 7416 return false; 7417 } 7418 7419 OperandMatchResultTy 7420 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7421 SMLoc S = getLoc(); 7422 int64_t Imm = 0; 7423 7424 if (trySkipId("offset")) { 7425 7426 bool Ok = false; 7427 if (skipToken(AsmToken::Colon, "expected a colon")) { 7428 if (trySkipId("swizzle")) { 7429 Ok = parseSwizzleMacro(Imm); 7430 } else { 7431 Ok = parseSwizzleOffset(Imm); 7432 } 7433 } 7434 7435 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7436 7437 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7438 } else { 7439 // Swizzle "offset" operand is optional. 7440 // If it is omitted, try parsing other optional operands. 7441 return parseOptionalOpr(Operands); 7442 } 7443 } 7444 7445 bool 7446 AMDGPUOperand::isSwizzle() const { 7447 return isImmTy(ImmTySwizzle); 7448 } 7449 7450 //===----------------------------------------------------------------------===// 7451 // VGPR Index Mode 7452 //===----------------------------------------------------------------------===// 7453 7454 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7455 7456 using namespace llvm::AMDGPU::VGPRIndexMode; 7457 7458 if (trySkipToken(AsmToken::RParen)) { 7459 return OFF; 7460 } 7461 7462 int64_t Imm = 0; 7463 7464 while (true) { 7465 unsigned Mode = 0; 7466 SMLoc S = getLoc(); 7467 7468 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7469 if (trySkipId(IdSymbolic[ModeId])) { 7470 Mode = 1 << ModeId; 7471 break; 7472 } 7473 } 7474 7475 if (Mode == 0) { 7476 Error(S, (Imm == 0)? 7477 "expected a VGPR index mode or a closing parenthesis" : 7478 "expected a VGPR index mode"); 7479 return UNDEF; 7480 } 7481 7482 if (Imm & Mode) { 7483 Error(S, "duplicate VGPR index mode"); 7484 return UNDEF; 7485 } 7486 Imm |= Mode; 7487 7488 if (trySkipToken(AsmToken::RParen)) 7489 break; 7490 if (!skipToken(AsmToken::Comma, 7491 "expected a comma or a closing parenthesis")) 7492 return UNDEF; 7493 } 7494 7495 return Imm; 7496 } 7497 7498 OperandMatchResultTy 7499 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7500 7501 using namespace llvm::AMDGPU::VGPRIndexMode; 7502 7503 int64_t Imm = 0; 7504 SMLoc S = getLoc(); 7505 7506 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7507 Imm = parseGPRIdxMacro(); 7508 if (Imm == UNDEF) 7509 return MatchOperand_ParseFail; 7510 } else { 7511 if (getParser().parseAbsoluteExpression(Imm)) 7512 return MatchOperand_ParseFail; 7513 if (Imm < 0 || !isUInt<4>(Imm)) { 7514 Error(S, "invalid immediate: only 4-bit values are legal"); 7515 return MatchOperand_ParseFail; 7516 } 7517 } 7518 7519 Operands.push_back( 7520 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7521 return MatchOperand_Success; 7522 } 7523 7524 bool AMDGPUOperand::isGPRIdxMode() const { 7525 return isImmTy(ImmTyGprIdxMode); 7526 } 7527 7528 //===----------------------------------------------------------------------===// 7529 // sopp branch targets 7530 //===----------------------------------------------------------------------===// 7531 7532 OperandMatchResultTy 7533 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7534 7535 // Make sure we are not parsing something 7536 // that looks like a label or an expression but is not. 7537 // This will improve error messages. 7538 if (isRegister() || isModifier()) 7539 return MatchOperand_NoMatch; 7540 7541 if (!parseExpr(Operands)) 7542 return MatchOperand_ParseFail; 7543 7544 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7545 assert(Opr.isImm() || Opr.isExpr()); 7546 SMLoc Loc = Opr.getStartLoc(); 7547 7548 // Currently we do not support arbitrary expressions as branch targets. 7549 // Only labels and absolute expressions are accepted. 7550 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7551 Error(Loc, "expected an absolute expression or a label"); 7552 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7553 Error(Loc, "expected a 16-bit signed jump offset"); 7554 } 7555 7556 return MatchOperand_Success; 7557 } 7558 7559 //===----------------------------------------------------------------------===// 7560 // Boolean holding registers 7561 //===----------------------------------------------------------------------===// 7562 7563 OperandMatchResultTy 7564 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7565 return parseReg(Operands); 7566 } 7567 7568 //===----------------------------------------------------------------------===// 7569 // mubuf 7570 //===----------------------------------------------------------------------===// 7571 7572 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7573 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7574 } 7575 7576 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7577 const OperandVector &Operands, 7578 bool IsAtomic, 7579 bool IsLds) { 7580 OptionalImmIndexMap OptionalIdx; 7581 unsigned FirstOperandIdx = 1; 7582 bool IsAtomicReturn = false; 7583 7584 if (IsAtomic) { 7585 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7587 if (!Op.isCPol()) 7588 continue; 7589 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7590 break; 7591 } 7592 7593 if (!IsAtomicReturn) { 7594 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7595 if (NewOpc != -1) 7596 Inst.setOpcode(NewOpc); 7597 } 7598 7599 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7600 SIInstrFlags::IsAtomicRet; 7601 } 7602 7603 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7604 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7605 7606 // Add the register arguments 7607 if (Op.isReg()) { 7608 Op.addRegOperands(Inst, 1); 7609 // Insert a tied src for atomic return dst. 7610 // This cannot be postponed as subsequent calls to 7611 // addImmOperands rely on correct number of MC operands. 7612 if (IsAtomicReturn && i == FirstOperandIdx) 7613 Op.addRegOperands(Inst, 1); 7614 continue; 7615 } 7616 7617 // Handle the case where soffset is an immediate 7618 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7619 Op.addImmOperands(Inst, 1); 7620 continue; 7621 } 7622 7623 // Handle tokens like 'offen' which are sometimes hard-coded into the 7624 // asm string. There are no MCInst operands for these. 7625 if (Op.isToken()) { 7626 continue; 7627 } 7628 assert(Op.isImm()); 7629 7630 // Handle optional arguments 7631 OptionalIdx[Op.getImmTy()] = i; 7632 } 7633 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7636 7637 if (!IsLds) { // tfe is not legal with lds opcodes 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7639 } 7640 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7641 } 7642 7643 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7644 OptionalImmIndexMap OptionalIdx; 7645 7646 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7647 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7648 7649 // Add the register arguments 7650 if (Op.isReg()) { 7651 Op.addRegOperands(Inst, 1); 7652 continue; 7653 } 7654 7655 // Handle the case where soffset is an immediate 7656 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7657 Op.addImmOperands(Inst, 1); 7658 continue; 7659 } 7660 7661 // Handle tokens like 'offen' which are sometimes hard-coded into the 7662 // asm string. There are no MCInst operands for these. 7663 if (Op.isToken()) { 7664 continue; 7665 } 7666 assert(Op.isImm()); 7667 7668 // Handle optional arguments 7669 OptionalIdx[Op.getImmTy()] = i; 7670 } 7671 7672 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7673 AMDGPUOperand::ImmTyOffset); 7674 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7678 } 7679 7680 //===----------------------------------------------------------------------===// 7681 // mimg 7682 //===----------------------------------------------------------------------===// 7683 7684 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7685 bool IsAtomic) { 7686 unsigned I = 1; 7687 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7688 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7689 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7690 } 7691 7692 if (IsAtomic) { 7693 // Add src, same as dst 7694 assert(Desc.getNumDefs() == 1); 7695 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7696 } 7697 7698 OptionalImmIndexMap OptionalIdx; 7699 7700 for (unsigned E = Operands.size(); I != E; ++I) { 7701 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7702 7703 // Add the register arguments 7704 if (Op.isReg()) { 7705 Op.addRegOperands(Inst, 1); 7706 } else if (Op.isImmModifier()) { 7707 OptionalIdx[Op.getImmTy()] = I; 7708 } else if (!Op.isToken()) { 7709 llvm_unreachable("unexpected operand type"); 7710 } 7711 } 7712 7713 bool IsGFX10Plus = isGFX10Plus(); 7714 7715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7716 if (IsGFX10Plus) 7717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7721 if (IsGFX10Plus) 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7723 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7726 if (!IsGFX10Plus) 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7729 } 7730 7731 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7732 cvtMIMG(Inst, Operands, true); 7733 } 7734 7735 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7736 OptionalImmIndexMap OptionalIdx; 7737 bool IsAtomicReturn = false; 7738 7739 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7740 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7741 if (!Op.isCPol()) 7742 continue; 7743 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7744 break; 7745 } 7746 7747 if (!IsAtomicReturn) { 7748 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7749 if (NewOpc != -1) 7750 Inst.setOpcode(NewOpc); 7751 } 7752 7753 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7754 SIInstrFlags::IsAtomicRet; 7755 7756 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7757 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7758 7759 // Add the register arguments 7760 if (Op.isReg()) { 7761 Op.addRegOperands(Inst, 1); 7762 if (IsAtomicReturn && i == 1) 7763 Op.addRegOperands(Inst, 1); 7764 continue; 7765 } 7766 7767 // Handle the case where soffset is an immediate 7768 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7769 Op.addImmOperands(Inst, 1); 7770 continue; 7771 } 7772 7773 // Handle tokens like 'offen' which are sometimes hard-coded into the 7774 // asm string. There are no MCInst operands for these. 7775 if (Op.isToken()) { 7776 continue; 7777 } 7778 assert(Op.isImm()); 7779 7780 // Handle optional arguments 7781 OptionalIdx[Op.getImmTy()] = i; 7782 } 7783 7784 if ((int)Inst.getNumOperands() <= 7785 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7788 } 7789 7790 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7791 const OperandVector &Operands) { 7792 for (unsigned I = 1; I < Operands.size(); ++I) { 7793 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7794 if (Operand.isReg()) 7795 Operand.addRegOperands(Inst, 1); 7796 } 7797 7798 Inst.addOperand(MCOperand::createImm(1)); // a16 7799 } 7800 7801 //===----------------------------------------------------------------------===// 7802 // smrd 7803 //===----------------------------------------------------------------------===// 7804 7805 bool AMDGPUOperand::isSMRDOffset8() const { 7806 return isImm() && isUInt<8>(getImm()); 7807 } 7808 7809 bool AMDGPUOperand::isSMEMOffset() const { 7810 return isImmTy(ImmTyNone) || 7811 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7812 } 7813 7814 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7815 // 32-bit literals are only supported on CI and we only want to use them 7816 // when the offset is > 8-bits. 7817 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7818 } 7819 7820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7821 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7822 } 7823 7824 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7825 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7826 } 7827 7828 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7829 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7830 } 7831 7832 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7833 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7834 } 7835 7836 //===----------------------------------------------------------------------===// 7837 // vop3 7838 //===----------------------------------------------------------------------===// 7839 7840 static bool ConvertOmodMul(int64_t &Mul) { 7841 if (Mul != 1 && Mul != 2 && Mul != 4) 7842 return false; 7843 7844 Mul >>= 1; 7845 return true; 7846 } 7847 7848 static bool ConvertOmodDiv(int64_t &Div) { 7849 if (Div == 1) { 7850 Div = 0; 7851 return true; 7852 } 7853 7854 if (Div == 2) { 7855 Div = 3; 7856 return true; 7857 } 7858 7859 return false; 7860 } 7861 7862 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7863 // This is intentional and ensures compatibility with sp3. 7864 // See bug 35397 for details. 7865 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7866 if (BoundCtrl == 0 || BoundCtrl == 1) { 7867 BoundCtrl = 1; 7868 return true; 7869 } 7870 return false; 7871 } 7872 7873 // Note: the order in this table matches the order of operands in AsmString. 7874 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7875 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7876 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7877 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7878 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7879 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7880 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7881 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7882 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7883 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7884 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7885 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7886 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7887 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7888 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7889 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7890 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7891 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7892 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7893 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7894 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7895 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7896 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7897 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7898 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7899 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7900 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7901 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7902 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7903 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7904 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7905 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7906 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7907 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7908 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7909 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7910 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7911 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7912 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7913 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7914 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7915 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7916 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7917 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7918 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7919 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7920 }; 7921 7922 void AMDGPUAsmParser::onBeginOfFile() { 7923 if (!getParser().getStreamer().getTargetStreamer() || 7924 getSTI().getTargetTriple().getArch() == Triple::r600) 7925 return; 7926 7927 if (!getTargetStreamer().getTargetID()) 7928 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7929 7930 if (isHsaAbiVersion3AndAbove(&getSTI())) 7931 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7932 } 7933 7934 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7935 7936 OperandMatchResultTy res = parseOptionalOpr(Operands); 7937 7938 // This is a hack to enable hardcoded mandatory operands which follow 7939 // optional operands. 7940 // 7941 // Current design assumes that all operands after the first optional operand 7942 // are also optional. However implementation of some instructions violates 7943 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7944 // 7945 // To alleviate this problem, we have to (implicitly) parse extra operands 7946 // to make sure autogenerated parser of custom operands never hit hardcoded 7947 // mandatory operands. 7948 7949 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7950 if (res != MatchOperand_Success || 7951 isToken(AsmToken::EndOfStatement)) 7952 break; 7953 7954 trySkipToken(AsmToken::Comma); 7955 res = parseOptionalOpr(Operands); 7956 } 7957 7958 return res; 7959 } 7960 7961 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7962 OperandMatchResultTy res; 7963 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7964 // try to parse any optional operand here 7965 if (Op.IsBit) { 7966 res = parseNamedBit(Op.Name, Operands, Op.Type); 7967 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7968 res = parseOModOperand(Operands); 7969 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7970 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7971 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7972 res = parseSDWASel(Operands, Op.Name, Op.Type); 7973 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7974 res = parseSDWADstUnused(Operands); 7975 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7976 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7977 Op.Type == AMDGPUOperand::ImmTyNegLo || 7978 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7979 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7980 Op.ConvertResult); 7981 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7982 res = parseDim(Operands); 7983 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7984 res = parseCPol(Operands); 7985 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7986 res = parseDPP8(Operands); 7987 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7988 res = parseDPPCtrl(Operands); 7989 } else { 7990 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7991 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7992 res = parseOperandArrayWithPrefix("neg", Operands, 7993 AMDGPUOperand::ImmTyBLGP, 7994 nullptr); 7995 } 7996 } 7997 if (res != MatchOperand_NoMatch) { 7998 return res; 7999 } 8000 } 8001 return MatchOperand_NoMatch; 8002 } 8003 8004 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8005 StringRef Name = getTokenStr(); 8006 if (Name == "mul") { 8007 return parseIntWithPrefix("mul", Operands, 8008 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8009 } 8010 8011 if (Name == "div") { 8012 return parseIntWithPrefix("div", Operands, 8013 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8014 } 8015 8016 return MatchOperand_NoMatch; 8017 } 8018 8019 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8020 cvtVOP3P(Inst, Operands); 8021 8022 int Opc = Inst.getOpcode(); 8023 8024 int SrcNum; 8025 const int Ops[] = { AMDGPU::OpName::src0, 8026 AMDGPU::OpName::src1, 8027 AMDGPU::OpName::src2 }; 8028 for (SrcNum = 0; 8029 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8030 ++SrcNum); 8031 assert(SrcNum > 0); 8032 8033 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8034 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8035 8036 if ((OpSel & (1 << SrcNum)) != 0) { 8037 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8038 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8039 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8040 } 8041 } 8042 8043 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8044 // 1. This operand is input modifiers 8045 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8046 // 2. This is not last operand 8047 && Desc.NumOperands > (OpNum + 1) 8048 // 3. Next operand is register class 8049 && Desc.OpInfo[OpNum + 1].RegClass != -1 8050 // 4. Next register is not tied to any other operand 8051 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8052 } 8053 8054 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8055 { 8056 OptionalImmIndexMap OptionalIdx; 8057 unsigned Opc = Inst.getOpcode(); 8058 8059 unsigned I = 1; 8060 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8061 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8062 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8063 } 8064 8065 for (unsigned E = Operands.size(); I != E; ++I) { 8066 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8067 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8068 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8069 } else if (Op.isInterpSlot() || 8070 Op.isInterpAttr() || 8071 Op.isAttrChan()) { 8072 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8073 } else if (Op.isImmModifier()) { 8074 OptionalIdx[Op.getImmTy()] = I; 8075 } else { 8076 llvm_unreachable("unhandled operand type"); 8077 } 8078 } 8079 8080 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8082 } 8083 8084 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8086 } 8087 8088 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8090 } 8091 } 8092 8093 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8094 { 8095 OptionalImmIndexMap OptionalIdx; 8096 unsigned Opc = Inst.getOpcode(); 8097 8098 unsigned I = 1; 8099 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8100 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8101 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8102 } 8103 8104 for (unsigned E = Operands.size(); I != E; ++I) { 8105 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8106 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8107 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8108 } else if (Op.isImmModifier()) { 8109 OptionalIdx[Op.getImmTy()] = I; 8110 } else { 8111 llvm_unreachable("unhandled operand type"); 8112 } 8113 } 8114 8115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8116 8117 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8118 if (OpSelIdx != -1) 8119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8120 8121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8122 8123 if (OpSelIdx == -1) 8124 return; 8125 8126 const int Ops[] = { AMDGPU::OpName::src0, 8127 AMDGPU::OpName::src1, 8128 AMDGPU::OpName::src2 }; 8129 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8130 AMDGPU::OpName::src1_modifiers, 8131 AMDGPU::OpName::src2_modifiers }; 8132 8133 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8134 8135 for (int J = 0; J < 3; ++J) { 8136 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8137 if (OpIdx == -1) 8138 break; 8139 8140 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8141 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8142 8143 if ((OpSel & (1 << J)) != 0) 8144 ModVal |= SISrcMods::OP_SEL_0; 8145 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8146 (OpSel & (1 << 3)) != 0) 8147 ModVal |= SISrcMods::DST_OP_SEL; 8148 8149 Inst.getOperand(ModIdx).setImm(ModVal); 8150 } 8151 } 8152 8153 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8154 OptionalImmIndexMap &OptionalIdx) { 8155 unsigned Opc = Inst.getOpcode(); 8156 8157 unsigned I = 1; 8158 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8159 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8160 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8161 } 8162 8163 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8164 // This instruction has src modifiers 8165 for (unsigned E = Operands.size(); I != E; ++I) { 8166 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8167 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8168 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8169 } else if (Op.isImmModifier()) { 8170 OptionalIdx[Op.getImmTy()] = I; 8171 } else if (Op.isRegOrImm()) { 8172 Op.addRegOrImmOperands(Inst, 1); 8173 } else { 8174 llvm_unreachable("unhandled operand type"); 8175 } 8176 } 8177 } else { 8178 // No src modifiers 8179 for (unsigned E = Operands.size(); I != E; ++I) { 8180 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8181 if (Op.isMod()) { 8182 OptionalIdx[Op.getImmTy()] = I; 8183 } else { 8184 Op.addRegOrImmOperands(Inst, 1); 8185 } 8186 } 8187 } 8188 8189 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8191 } 8192 8193 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8195 } 8196 8197 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8198 // it has src2 register operand that is tied to dst operand 8199 // we don't allow modifiers for this operand in assembler so src2_modifiers 8200 // should be 0. 8201 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8202 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8203 Opc == AMDGPU::V_MAC_F32_e64_vi || 8204 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8205 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8206 Opc == AMDGPU::V_MAC_F16_e64_vi || 8207 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8208 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8209 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8210 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8211 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8212 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8213 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8214 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8215 auto it = Inst.begin(); 8216 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8217 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8218 ++it; 8219 // Copy the operand to ensure it's not invalidated when Inst grows. 8220 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8221 } 8222 } 8223 8224 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8225 OptionalImmIndexMap OptionalIdx; 8226 cvtVOP3(Inst, Operands, OptionalIdx); 8227 } 8228 8229 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8230 OptionalImmIndexMap &OptIdx) { 8231 const int Opc = Inst.getOpcode(); 8232 const MCInstrDesc &Desc = MII.get(Opc); 8233 8234 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8235 8236 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8237 assert(!IsPacked); 8238 Inst.addOperand(Inst.getOperand(0)); 8239 } 8240 8241 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8242 // instruction, and then figure out where to actually put the modifiers 8243 8244 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8245 if (OpSelIdx != -1) { 8246 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8247 } 8248 8249 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8250 if (OpSelHiIdx != -1) { 8251 int DefaultVal = IsPacked ? -1 : 0; 8252 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8253 DefaultVal); 8254 } 8255 8256 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8257 if (NegLoIdx != -1) { 8258 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8259 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8260 } 8261 8262 const int Ops[] = { AMDGPU::OpName::src0, 8263 AMDGPU::OpName::src1, 8264 AMDGPU::OpName::src2 }; 8265 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8266 AMDGPU::OpName::src1_modifiers, 8267 AMDGPU::OpName::src2_modifiers }; 8268 8269 unsigned OpSel = 0; 8270 unsigned OpSelHi = 0; 8271 unsigned NegLo = 0; 8272 unsigned NegHi = 0; 8273 8274 if (OpSelIdx != -1) 8275 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8276 8277 if (OpSelHiIdx != -1) 8278 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8279 8280 if (NegLoIdx != -1) { 8281 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8282 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8283 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8284 } 8285 8286 for (int J = 0; J < 3; ++J) { 8287 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8288 if (OpIdx == -1) 8289 break; 8290 8291 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8292 8293 if (ModIdx == -1) 8294 continue; 8295 8296 uint32_t ModVal = 0; 8297 8298 if ((OpSel & (1 << J)) != 0) 8299 ModVal |= SISrcMods::OP_SEL_0; 8300 8301 if ((OpSelHi & (1 << J)) != 0) 8302 ModVal |= SISrcMods::OP_SEL_1; 8303 8304 if ((NegLo & (1 << J)) != 0) 8305 ModVal |= SISrcMods::NEG; 8306 8307 if ((NegHi & (1 << J)) != 0) 8308 ModVal |= SISrcMods::NEG_HI; 8309 8310 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8311 } 8312 } 8313 8314 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8315 OptionalImmIndexMap OptIdx; 8316 cvtVOP3(Inst, Operands, OptIdx); 8317 cvtVOP3P(Inst, Operands, OptIdx); 8318 } 8319 8320 //===----------------------------------------------------------------------===// 8321 // dpp 8322 //===----------------------------------------------------------------------===// 8323 8324 bool AMDGPUOperand::isDPP8() const { 8325 return isImmTy(ImmTyDPP8); 8326 } 8327 8328 bool AMDGPUOperand::isDPPCtrl() const { 8329 using namespace AMDGPU::DPP; 8330 8331 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8332 if (result) { 8333 int64_t Imm = getImm(); 8334 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8335 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8336 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8337 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8338 (Imm == DppCtrl::WAVE_SHL1) || 8339 (Imm == DppCtrl::WAVE_ROL1) || 8340 (Imm == DppCtrl::WAVE_SHR1) || 8341 (Imm == DppCtrl::WAVE_ROR1) || 8342 (Imm == DppCtrl::ROW_MIRROR) || 8343 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8344 (Imm == DppCtrl::BCAST15) || 8345 (Imm == DppCtrl::BCAST31) || 8346 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8347 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8348 } 8349 return false; 8350 } 8351 8352 //===----------------------------------------------------------------------===// 8353 // mAI 8354 //===----------------------------------------------------------------------===// 8355 8356 bool AMDGPUOperand::isBLGP() const { 8357 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8358 } 8359 8360 bool AMDGPUOperand::isCBSZ() const { 8361 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8362 } 8363 8364 bool AMDGPUOperand::isABID() const { 8365 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8366 } 8367 8368 bool AMDGPUOperand::isS16Imm() const { 8369 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8370 } 8371 8372 bool AMDGPUOperand::isU16Imm() const { 8373 return isImm() && isUInt<16>(getImm()); 8374 } 8375 8376 //===----------------------------------------------------------------------===// 8377 // dim 8378 //===----------------------------------------------------------------------===// 8379 8380 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8381 // We want to allow "dim:1D" etc., 8382 // but the initial 1 is tokenized as an integer. 8383 std::string Token; 8384 if (isToken(AsmToken::Integer)) { 8385 SMLoc Loc = getToken().getEndLoc(); 8386 Token = std::string(getTokenStr()); 8387 lex(); 8388 if (getLoc() != Loc) 8389 return false; 8390 } 8391 8392 StringRef Suffix; 8393 if (!parseId(Suffix)) 8394 return false; 8395 Token += Suffix; 8396 8397 StringRef DimId = Token; 8398 if (DimId.startswith("SQ_RSRC_IMG_")) 8399 DimId = DimId.drop_front(12); 8400 8401 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8402 if (!DimInfo) 8403 return false; 8404 8405 Encoding = DimInfo->Encoding; 8406 return true; 8407 } 8408 8409 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8410 if (!isGFX10Plus()) 8411 return MatchOperand_NoMatch; 8412 8413 SMLoc S = getLoc(); 8414 8415 if (!trySkipId("dim", AsmToken::Colon)) 8416 return MatchOperand_NoMatch; 8417 8418 unsigned Encoding; 8419 SMLoc Loc = getLoc(); 8420 if (!parseDimId(Encoding)) { 8421 Error(Loc, "invalid dim value"); 8422 return MatchOperand_ParseFail; 8423 } 8424 8425 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8426 AMDGPUOperand::ImmTyDim)); 8427 return MatchOperand_Success; 8428 } 8429 8430 //===----------------------------------------------------------------------===// 8431 // dpp 8432 //===----------------------------------------------------------------------===// 8433 8434 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8435 SMLoc S = getLoc(); 8436 8437 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8438 return MatchOperand_NoMatch; 8439 8440 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8441 8442 int64_t Sels[8]; 8443 8444 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8445 return MatchOperand_ParseFail; 8446 8447 for (size_t i = 0; i < 8; ++i) { 8448 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8449 return MatchOperand_ParseFail; 8450 8451 SMLoc Loc = getLoc(); 8452 if (getParser().parseAbsoluteExpression(Sels[i])) 8453 return MatchOperand_ParseFail; 8454 if (0 > Sels[i] || 7 < Sels[i]) { 8455 Error(Loc, "expected a 3-bit value"); 8456 return MatchOperand_ParseFail; 8457 } 8458 } 8459 8460 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8461 return MatchOperand_ParseFail; 8462 8463 unsigned DPP8 = 0; 8464 for (size_t i = 0; i < 8; ++i) 8465 DPP8 |= (Sels[i] << (i * 3)); 8466 8467 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8468 return MatchOperand_Success; 8469 } 8470 8471 bool 8472 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8473 const OperandVector &Operands) { 8474 if (Ctrl == "row_newbcast") 8475 return isGFX90A(); 8476 8477 if (Ctrl == "row_share" || 8478 Ctrl == "row_xmask") 8479 return isGFX10Plus(); 8480 8481 if (Ctrl == "wave_shl" || 8482 Ctrl == "wave_shr" || 8483 Ctrl == "wave_rol" || 8484 Ctrl == "wave_ror" || 8485 Ctrl == "row_bcast") 8486 return isVI() || isGFX9(); 8487 8488 return Ctrl == "row_mirror" || 8489 Ctrl == "row_half_mirror" || 8490 Ctrl == "quad_perm" || 8491 Ctrl == "row_shl" || 8492 Ctrl == "row_shr" || 8493 Ctrl == "row_ror"; 8494 } 8495 8496 int64_t 8497 AMDGPUAsmParser::parseDPPCtrlPerm() { 8498 // quad_perm:[%d,%d,%d,%d] 8499 8500 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8501 return -1; 8502 8503 int64_t Val = 0; 8504 for (int i = 0; i < 4; ++i) { 8505 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8506 return -1; 8507 8508 int64_t Temp; 8509 SMLoc Loc = getLoc(); 8510 if (getParser().parseAbsoluteExpression(Temp)) 8511 return -1; 8512 if (Temp < 0 || Temp > 3) { 8513 Error(Loc, "expected a 2-bit value"); 8514 return -1; 8515 } 8516 8517 Val += (Temp << i * 2); 8518 } 8519 8520 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8521 return -1; 8522 8523 return Val; 8524 } 8525 8526 int64_t 8527 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8528 using namespace AMDGPU::DPP; 8529 8530 // sel:%d 8531 8532 int64_t Val; 8533 SMLoc Loc = getLoc(); 8534 8535 if (getParser().parseAbsoluteExpression(Val)) 8536 return -1; 8537 8538 struct DppCtrlCheck { 8539 int64_t Ctrl; 8540 int Lo; 8541 int Hi; 8542 }; 8543 8544 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8545 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8546 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8547 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8548 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8549 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8550 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8551 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8552 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8553 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8554 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8555 .Default({-1, 0, 0}); 8556 8557 bool Valid; 8558 if (Check.Ctrl == -1) { 8559 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8560 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8561 } else { 8562 Valid = Check.Lo <= Val && Val <= Check.Hi; 8563 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8564 } 8565 8566 if (!Valid) { 8567 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8568 return -1; 8569 } 8570 8571 return Val; 8572 } 8573 8574 OperandMatchResultTy 8575 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8576 using namespace AMDGPU::DPP; 8577 8578 if (!isToken(AsmToken::Identifier) || 8579 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8580 return MatchOperand_NoMatch; 8581 8582 SMLoc S = getLoc(); 8583 int64_t Val = -1; 8584 StringRef Ctrl; 8585 8586 parseId(Ctrl); 8587 8588 if (Ctrl == "row_mirror") { 8589 Val = DppCtrl::ROW_MIRROR; 8590 } else if (Ctrl == "row_half_mirror") { 8591 Val = DppCtrl::ROW_HALF_MIRROR; 8592 } else { 8593 if (skipToken(AsmToken::Colon, "expected a colon")) { 8594 if (Ctrl == "quad_perm") { 8595 Val = parseDPPCtrlPerm(); 8596 } else { 8597 Val = parseDPPCtrlSel(Ctrl); 8598 } 8599 } 8600 } 8601 8602 if (Val == -1) 8603 return MatchOperand_ParseFail; 8604 8605 Operands.push_back( 8606 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8607 return MatchOperand_Success; 8608 } 8609 8610 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8611 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8612 } 8613 8614 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8615 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8616 } 8617 8618 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8619 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8620 } 8621 8622 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8623 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8624 } 8625 8626 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8627 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8628 } 8629 8630 // Add dummy $old operand 8631 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8632 const OperandVector &Operands, 8633 bool IsDPP8) { 8634 Inst.addOperand(MCOperand::createReg(0)); 8635 cvtVOP3DPP(Inst, Operands, IsDPP8); 8636 } 8637 8638 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8639 OptionalImmIndexMap OptionalIdx; 8640 unsigned Opc = Inst.getOpcode(); 8641 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8642 unsigned I = 1; 8643 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8644 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8645 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8646 } 8647 8648 int Fi = 0; 8649 for (unsigned E = Operands.size(); I != E; ++I) { 8650 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8651 MCOI::TIED_TO); 8652 if (TiedTo != -1) { 8653 assert((unsigned)TiedTo < Inst.getNumOperands()); 8654 // handle tied old or src2 for MAC instructions 8655 Inst.addOperand(Inst.getOperand(TiedTo)); 8656 } 8657 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8658 // Add the register arguments 8659 if (IsDPP8 && Op.isFI()) { 8660 Fi = Op.getImm(); 8661 } else if (HasModifiers && 8662 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8663 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8664 } else if (Op.isReg()) { 8665 Op.addRegOperands(Inst, 1); 8666 } else if (Op.isImm() && 8667 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8668 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8669 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8670 Op.addImmOperands(Inst, 1); 8671 } else if (Op.isImm()) { 8672 OptionalIdx[Op.getImmTy()] = I; 8673 } else { 8674 llvm_unreachable("unhandled operand type"); 8675 } 8676 } 8677 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8679 } 8680 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8681 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8682 } 8683 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8684 cvtVOP3P(Inst, Operands, OptionalIdx); 8685 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8687 } 8688 8689 if (IsDPP8) { 8690 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8691 using namespace llvm::AMDGPU::DPP; 8692 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8693 } else { 8694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8695 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8697 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8698 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8700 } 8701 } 8702 } 8703 8704 // Add dummy $old operand 8705 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8706 const OperandVector &Operands, 8707 bool IsDPP8) { 8708 Inst.addOperand(MCOperand::createReg(0)); 8709 cvtDPP(Inst, Operands, IsDPP8); 8710 } 8711 8712 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8713 OptionalImmIndexMap OptionalIdx; 8714 8715 unsigned Opc = Inst.getOpcode(); 8716 bool HasModifiers = 8717 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8718 unsigned I = 1; 8719 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8720 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8721 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8722 } 8723 8724 int Fi = 0; 8725 for (unsigned E = Operands.size(); I != E; ++I) { 8726 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8727 MCOI::TIED_TO); 8728 if (TiedTo != -1) { 8729 assert((unsigned)TiedTo < Inst.getNumOperands()); 8730 // handle tied old or src2 for MAC instructions 8731 Inst.addOperand(Inst.getOperand(TiedTo)); 8732 } 8733 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8734 // Add the register arguments 8735 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8736 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8737 // Skip it. 8738 continue; 8739 } 8740 8741 if (IsDPP8) { 8742 if (Op.isDPP8()) { 8743 Op.addImmOperands(Inst, 1); 8744 } else if (HasModifiers && 8745 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8746 Op.addRegWithFPInputModsOperands(Inst, 2); 8747 } else if (Op.isFI()) { 8748 Fi = Op.getImm(); 8749 } else if (Op.isReg()) { 8750 Op.addRegOperands(Inst, 1); 8751 } else { 8752 llvm_unreachable("Invalid operand type"); 8753 } 8754 } else { 8755 if (HasModifiers && 8756 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8757 Op.addRegWithFPInputModsOperands(Inst, 2); 8758 } else if (Op.isReg()) { 8759 Op.addRegOperands(Inst, 1); 8760 } else if (Op.isDPPCtrl()) { 8761 Op.addImmOperands(Inst, 1); 8762 } else if (Op.isImm()) { 8763 // Handle optional arguments 8764 OptionalIdx[Op.getImmTy()] = I; 8765 } else { 8766 llvm_unreachable("Invalid operand type"); 8767 } 8768 } 8769 } 8770 8771 if (IsDPP8) { 8772 using namespace llvm::AMDGPU::DPP; 8773 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8774 } else { 8775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8778 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8780 } 8781 } 8782 } 8783 8784 //===----------------------------------------------------------------------===// 8785 // sdwa 8786 //===----------------------------------------------------------------------===// 8787 8788 OperandMatchResultTy 8789 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8790 AMDGPUOperand::ImmTy Type) { 8791 using namespace llvm::AMDGPU::SDWA; 8792 8793 SMLoc S = getLoc(); 8794 StringRef Value; 8795 OperandMatchResultTy res; 8796 8797 SMLoc StringLoc; 8798 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8799 if (res != MatchOperand_Success) { 8800 return res; 8801 } 8802 8803 int64_t Int; 8804 Int = StringSwitch<int64_t>(Value) 8805 .Case("BYTE_0", SdwaSel::BYTE_0) 8806 .Case("BYTE_1", SdwaSel::BYTE_1) 8807 .Case("BYTE_2", SdwaSel::BYTE_2) 8808 .Case("BYTE_3", SdwaSel::BYTE_3) 8809 .Case("WORD_0", SdwaSel::WORD_0) 8810 .Case("WORD_1", SdwaSel::WORD_1) 8811 .Case("DWORD", SdwaSel::DWORD) 8812 .Default(0xffffffff); 8813 8814 if (Int == 0xffffffff) { 8815 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8816 return MatchOperand_ParseFail; 8817 } 8818 8819 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8820 return MatchOperand_Success; 8821 } 8822 8823 OperandMatchResultTy 8824 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8825 using namespace llvm::AMDGPU::SDWA; 8826 8827 SMLoc S = getLoc(); 8828 StringRef Value; 8829 OperandMatchResultTy res; 8830 8831 SMLoc StringLoc; 8832 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8833 if (res != MatchOperand_Success) { 8834 return res; 8835 } 8836 8837 int64_t Int; 8838 Int = StringSwitch<int64_t>(Value) 8839 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8840 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8841 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8842 .Default(0xffffffff); 8843 8844 if (Int == 0xffffffff) { 8845 Error(StringLoc, "invalid dst_unused value"); 8846 return MatchOperand_ParseFail; 8847 } 8848 8849 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8850 return MatchOperand_Success; 8851 } 8852 8853 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8854 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8855 } 8856 8857 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8858 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8859 } 8860 8861 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8862 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8863 } 8864 8865 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8866 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8867 } 8868 8869 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8870 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8871 } 8872 8873 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8874 uint64_t BasicInstType, 8875 bool SkipDstVcc, 8876 bool SkipSrcVcc) { 8877 using namespace llvm::AMDGPU::SDWA; 8878 8879 OptionalImmIndexMap OptionalIdx; 8880 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8881 bool SkippedVcc = false; 8882 8883 unsigned I = 1; 8884 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8885 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8886 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8887 } 8888 8889 for (unsigned E = Operands.size(); I != E; ++I) { 8890 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8891 if (SkipVcc && !SkippedVcc && Op.isReg() && 8892 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8893 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8894 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8895 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8896 // Skip VCC only if we didn't skip it on previous iteration. 8897 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8898 if (BasicInstType == SIInstrFlags::VOP2 && 8899 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8900 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8901 SkippedVcc = true; 8902 continue; 8903 } else if (BasicInstType == SIInstrFlags::VOPC && 8904 Inst.getNumOperands() == 0) { 8905 SkippedVcc = true; 8906 continue; 8907 } 8908 } 8909 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8910 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8911 } else if (Op.isImm()) { 8912 // Handle optional arguments 8913 OptionalIdx[Op.getImmTy()] = I; 8914 } else { 8915 llvm_unreachable("Invalid operand type"); 8916 } 8917 SkippedVcc = false; 8918 } 8919 8920 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8921 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8922 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8923 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8924 switch (BasicInstType) { 8925 case SIInstrFlags::VOP1: 8926 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8927 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8929 } 8930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8932 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8933 break; 8934 8935 case SIInstrFlags::VOP2: 8936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8937 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8939 } 8940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8944 break; 8945 8946 case SIInstrFlags::VOPC: 8947 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8948 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8951 break; 8952 8953 default: 8954 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8955 } 8956 } 8957 8958 // special case v_mac_{f16, f32}: 8959 // it has src2 register operand that is tied to dst operand 8960 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8961 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8962 auto it = Inst.begin(); 8963 std::advance( 8964 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8965 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8966 } 8967 } 8968 8969 //===----------------------------------------------------------------------===// 8970 // mAI 8971 //===----------------------------------------------------------------------===// 8972 8973 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8974 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8975 } 8976 8977 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8978 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8979 } 8980 8981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8982 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8983 } 8984 8985 /// Force static initialization. 8986 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8987 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8988 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8989 } 8990 8991 #define GET_REGISTER_MATCHER 8992 #define GET_MATCHER_IMPLEMENTATION 8993 #define GET_MNEMONIC_SPELL_CHECKER 8994 #define GET_MNEMONIC_CHECKER 8995 #include "AMDGPUGenAsmMatcher.inc" 8996 8997 // This function should be defined after auto-generated include so that we have 8998 // MatchClassKind enum defined 8999 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9000 unsigned Kind) { 9001 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9002 // But MatchInstructionImpl() expects to meet token and fails to validate 9003 // operand. This method checks if we are given immediate operand but expect to 9004 // get corresponding token. 9005 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9006 switch (Kind) { 9007 case MCK_addr64: 9008 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9009 case MCK_gds: 9010 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9011 case MCK_lds: 9012 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9013 case MCK_idxen: 9014 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9015 case MCK_offen: 9016 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9017 case MCK_SSrcB32: 9018 // When operands have expression values, they will return true for isToken, 9019 // because it is not possible to distinguish between a token and an 9020 // expression at parse time. MatchInstructionImpl() will always try to 9021 // match an operand as a token, when isToken returns true, and when the 9022 // name of the expression is not a valid token, the match will fail, 9023 // so we need to handle it here. 9024 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9025 case MCK_SSrcF32: 9026 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9027 case MCK_SoppBrTarget: 9028 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9029 case MCK_VReg32OrOff: 9030 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9031 case MCK_InterpSlot: 9032 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9033 case MCK_Attr: 9034 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9035 case MCK_AttrChan: 9036 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9037 case MCK_ImmSMEMOffset: 9038 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9039 case MCK_SReg_64: 9040 case MCK_SReg_64_XEXEC: 9041 // Null is defined as a 32-bit register but 9042 // it should also be enabled with 64-bit operands. 9043 // The following code enables it for SReg_64 operands 9044 // used as source and destination. Remaining source 9045 // operands are handled in isInlinableImm. 9046 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9047 default: 9048 return Match_InvalidOperand; 9049 } 9050 } 9051 9052 //===----------------------------------------------------------------------===// 9053 // endpgm 9054 //===----------------------------------------------------------------------===// 9055 9056 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9057 SMLoc S = getLoc(); 9058 int64_t Imm = 0; 9059 9060 if (!parseExpr(Imm)) { 9061 // The operand is optional, if not present default to 0 9062 Imm = 0; 9063 } 9064 9065 if (!isUInt<16>(Imm)) { 9066 Error(S, "expected a 16-bit value"); 9067 return MatchOperand_ParseFail; 9068 } 9069 9070 Operands.push_back( 9071 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9072 return MatchOperand_Success; 9073 } 9074 9075 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9076 9077 //===----------------------------------------------------------------------===// 9078 // LDSDIR 9079 //===----------------------------------------------------------------------===// 9080 9081 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9082 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9083 } 9084 9085 bool AMDGPUOperand::isWaitVDST() const { 9086 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9087 } 9088 9089 //===----------------------------------------------------------------------===// 9090 // VINTERP 9091 //===----------------------------------------------------------------------===// 9092 9093 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9094 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9095 } 9096 9097 bool AMDGPUOperand::isWaitEXP() const { 9098 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9099 } 9100