1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1742 OptionalImmIndexMap &OptionalIdx); 1743 1744 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1745 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1746 1747 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1748 bool IsAtomic = false); 1749 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1750 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1751 1752 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1753 1754 bool parseDimId(unsigned &Encoding); 1755 OperandMatchResultTy parseDim(OperandVector &Operands); 1756 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1757 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1758 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1759 int64_t parseDPPCtrlSel(StringRef Ctrl); 1760 int64_t parseDPPCtrlPerm(); 1761 AMDGPUOperand::Ptr defaultRowMask() const; 1762 AMDGPUOperand::Ptr defaultBankMask() const; 1763 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1764 AMDGPUOperand::Ptr defaultFI() const; 1765 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1766 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1767 cvtDPP(Inst, Operands, true); 1768 } 1769 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1770 bool IsDPP8 = false); 1771 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1772 cvtVOPCNoDstDPP(Inst, Operands, true); 1773 } 1774 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1775 bool IsDPP8 = false); 1776 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1777 cvtVOP3DPP(Inst, Operands, true); 1778 } 1779 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1780 bool IsDPP8 = false); 1781 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1782 cvtVOPC64NoDstDPP(Inst, Operands, true); 1783 } 1784 1785 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1786 AMDGPUOperand::ImmTy Type); 1787 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1788 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1789 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1794 uint64_t BasicInstType, 1795 bool SkipDstVcc = false, 1796 bool SkipSrcVcc = false); 1797 1798 AMDGPUOperand::Ptr defaultBLGP() const; 1799 AMDGPUOperand::Ptr defaultCBSZ() const; 1800 AMDGPUOperand::Ptr defaultABID() const; 1801 1802 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1803 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1804 1805 AMDGPUOperand::Ptr defaultWaitVDST() const; 1806 AMDGPUOperand::Ptr defaultWaitEXP() const; 1807 }; 1808 1809 struct OptionalOperand { 1810 const char *Name; 1811 AMDGPUOperand::ImmTy Type; 1812 bool IsBit; 1813 bool (*ConvertResult)(int64_t&); 1814 }; 1815 1816 } // end anonymous namespace 1817 1818 // May be called with integer type with equivalent bitwidth. 1819 static const fltSemantics *getFltSemantics(unsigned Size) { 1820 switch (Size) { 1821 case 4: 1822 return &APFloat::IEEEsingle(); 1823 case 8: 1824 return &APFloat::IEEEdouble(); 1825 case 2: 1826 return &APFloat::IEEEhalf(); 1827 default: 1828 llvm_unreachable("unsupported fp type"); 1829 } 1830 } 1831 1832 static const fltSemantics *getFltSemantics(MVT VT) { 1833 return getFltSemantics(VT.getSizeInBits() / 8); 1834 } 1835 1836 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1837 switch (OperandType) { 1838 case AMDGPU::OPERAND_REG_IMM_INT32: 1839 case AMDGPU::OPERAND_REG_IMM_FP32: 1840 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1841 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1842 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1843 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1844 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1846 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1848 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1849 case AMDGPU::OPERAND_KIMM32: 1850 return &APFloat::IEEEsingle(); 1851 case AMDGPU::OPERAND_REG_IMM_INT64: 1852 case AMDGPU::OPERAND_REG_IMM_FP64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1854 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1856 return &APFloat::IEEEdouble(); 1857 case AMDGPU::OPERAND_REG_IMM_INT16: 1858 case AMDGPU::OPERAND_REG_IMM_FP16: 1859 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1860 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1861 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1862 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1868 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1869 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1870 case AMDGPU::OPERAND_KIMM16: 1871 return &APFloat::IEEEhalf(); 1872 default: 1873 llvm_unreachable("unsupported fp type"); 1874 } 1875 } 1876 1877 //===----------------------------------------------------------------------===// 1878 // Operand 1879 //===----------------------------------------------------------------------===// 1880 1881 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1882 bool Lost; 1883 1884 // Convert literal to single precision 1885 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1886 APFloat::rmNearestTiesToEven, 1887 &Lost); 1888 // We allow precision lost but not overflow or underflow 1889 if (Status != APFloat::opOK && 1890 Lost && 1891 ((Status & APFloat::opOverflow) != 0 || 1892 (Status & APFloat::opUnderflow) != 0)) { 1893 return false; 1894 } 1895 1896 return true; 1897 } 1898 1899 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1900 return isUIntN(Size, Val) || isIntN(Size, Val); 1901 } 1902 1903 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1904 if (VT.getScalarType() == MVT::i16) { 1905 // FP immediate values are broken. 1906 return isInlinableIntLiteral(Val); 1907 } 1908 1909 // f16/v2f16 operands work correctly for all values. 1910 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1911 } 1912 1913 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1914 1915 // This is a hack to enable named inline values like 1916 // shared_base with both 32-bit and 64-bit operands. 1917 // Note that these values are defined as 1918 // 32-bit operands only. 1919 if (isInlineValue()) { 1920 return true; 1921 } 1922 1923 if (!isImmTy(ImmTyNone)) { 1924 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1925 return false; 1926 } 1927 // TODO: We should avoid using host float here. It would be better to 1928 // check the float bit values which is what a few other places do. 1929 // We've had bot failures before due to weird NaN support on mips hosts. 1930 1931 APInt Literal(64, Imm.Val); 1932 1933 if (Imm.IsFPImm) { // We got fp literal token 1934 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1935 return AMDGPU::isInlinableLiteral64(Imm.Val, 1936 AsmParser->hasInv2PiInlineImm()); 1937 } 1938 1939 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1940 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1941 return false; 1942 1943 if (type.getScalarSizeInBits() == 16) { 1944 return isInlineableLiteralOp16( 1945 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1946 type, AsmParser->hasInv2PiInlineImm()); 1947 } 1948 1949 // Check if single precision literal is inlinable 1950 return AMDGPU::isInlinableLiteral32( 1951 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1952 AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 // We got int literal token. 1956 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1957 return AMDGPU::isInlinableLiteral64(Imm.Val, 1958 AsmParser->hasInv2PiInlineImm()); 1959 } 1960 1961 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1962 return false; 1963 } 1964 1965 if (type.getScalarSizeInBits() == 16) { 1966 return isInlineableLiteralOp16( 1967 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1968 type, AsmParser->hasInv2PiInlineImm()); 1969 } 1970 1971 return AMDGPU::isInlinableLiteral32( 1972 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1973 AsmParser->hasInv2PiInlineImm()); 1974 } 1975 1976 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1977 // Check that this immediate can be added as literal 1978 if (!isImmTy(ImmTyNone)) { 1979 return false; 1980 } 1981 1982 if (!Imm.IsFPImm) { 1983 // We got int literal token. 1984 1985 if (type == MVT::f64 && hasFPModifiers()) { 1986 // Cannot apply fp modifiers to int literals preserving the same semantics 1987 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1988 // disable these cases. 1989 return false; 1990 } 1991 1992 unsigned Size = type.getSizeInBits(); 1993 if (Size == 64) 1994 Size = 32; 1995 1996 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1997 // types. 1998 return isSafeTruncation(Imm.Val, Size); 1999 } 2000 2001 // We got fp literal token 2002 if (type == MVT::f64) { // Expected 64-bit fp operand 2003 // We would set low 64-bits of literal to zeroes but we accept this literals 2004 return true; 2005 } 2006 2007 if (type == MVT::i64) { // Expected 64-bit int operand 2008 // We don't allow fp literals in 64-bit integer instructions. It is 2009 // unclear how we should encode them. 2010 return false; 2011 } 2012 2013 // We allow fp literals with f16x2 operands assuming that the specified 2014 // literal goes into the lower half and the upper half is zero. We also 2015 // require that the literal may be losslessly converted to f16. 2016 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2017 (type == MVT::v2i16)? MVT::i16 : 2018 (type == MVT::v2f32)? MVT::f32 : type; 2019 2020 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2021 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2022 } 2023 2024 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2025 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2026 } 2027 2028 bool AMDGPUOperand::isVRegWithInputMods() const { 2029 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2030 // GFX90A allows DPP on 64-bit operands. 2031 (isRegClass(AMDGPU::VReg_64RegClassID) && 2032 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2033 } 2034 2035 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2036 if (AsmParser->isVI()) 2037 return isVReg32(); 2038 else if (AsmParser->isGFX9Plus()) 2039 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2040 else 2041 return false; 2042 } 2043 2044 bool AMDGPUOperand::isSDWAFP16Operand() const { 2045 return isSDWAOperand(MVT::f16); 2046 } 2047 2048 bool AMDGPUOperand::isSDWAFP32Operand() const { 2049 return isSDWAOperand(MVT::f32); 2050 } 2051 2052 bool AMDGPUOperand::isSDWAInt16Operand() const { 2053 return isSDWAOperand(MVT::i16); 2054 } 2055 2056 bool AMDGPUOperand::isSDWAInt32Operand() const { 2057 return isSDWAOperand(MVT::i32); 2058 } 2059 2060 bool AMDGPUOperand::isBoolReg() const { 2061 auto FB = AsmParser->getFeatureBits(); 2062 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2063 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2064 } 2065 2066 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2067 { 2068 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2069 assert(Size == 2 || Size == 4 || Size == 8); 2070 2071 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2072 2073 if (Imm.Mods.Abs) { 2074 Val &= ~FpSignMask; 2075 } 2076 if (Imm.Mods.Neg) { 2077 Val ^= FpSignMask; 2078 } 2079 2080 return Val; 2081 } 2082 2083 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2084 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2085 Inst.getNumOperands())) { 2086 addLiteralImmOperand(Inst, Imm.Val, 2087 ApplyModifiers & 2088 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2089 } else { 2090 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2091 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2092 setImmKindNone(); 2093 } 2094 } 2095 2096 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2097 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2098 auto OpNum = Inst.getNumOperands(); 2099 // Check that this operand accepts literals 2100 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2101 2102 if (ApplyModifiers) { 2103 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2104 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2105 Val = applyInputFPModifiers(Val, Size); 2106 } 2107 2108 APInt Literal(64, Val); 2109 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2110 2111 if (Imm.IsFPImm) { // We got fp literal token 2112 switch (OpTy) { 2113 case AMDGPU::OPERAND_REG_IMM_INT64: 2114 case AMDGPU::OPERAND_REG_IMM_FP64: 2115 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2116 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2118 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2119 AsmParser->hasInv2PiInlineImm())) { 2120 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2121 setImmKindConst(); 2122 return; 2123 } 2124 2125 // Non-inlineable 2126 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2127 // For fp operands we check if low 32 bits are zeros 2128 if (Literal.getLoBits(32) != 0) { 2129 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2130 "Can't encode literal as exact 64-bit floating-point operand. " 2131 "Low 32-bits will be set to zero"); 2132 } 2133 2134 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2135 setImmKindLiteral(); 2136 return; 2137 } 2138 2139 // We don't allow fp literals in 64-bit integer instructions. It is 2140 // unclear how we should encode them. This case should be checked earlier 2141 // in predicate methods (isLiteralImm()) 2142 llvm_unreachable("fp literal in 64-bit integer instruction."); 2143 2144 case AMDGPU::OPERAND_REG_IMM_INT32: 2145 case AMDGPU::OPERAND_REG_IMM_FP32: 2146 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2147 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2148 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2149 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2150 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2151 case AMDGPU::OPERAND_REG_IMM_INT16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16: 2153 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2156 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2162 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2163 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2164 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2166 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2167 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2168 case AMDGPU::OPERAND_KIMM32: 2169 case AMDGPU::OPERAND_KIMM16: { 2170 bool lost; 2171 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2172 // Convert literal to single precision 2173 FPLiteral.convert(*getOpFltSemantics(OpTy), 2174 APFloat::rmNearestTiesToEven, &lost); 2175 // We allow precision lost but not overflow or underflow. This should be 2176 // checked earlier in isLiteralImm() 2177 2178 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2179 Inst.addOperand(MCOperand::createImm(ImmVal)); 2180 setImmKindLiteral(); 2181 return; 2182 } 2183 default: 2184 llvm_unreachable("invalid operand size"); 2185 } 2186 2187 return; 2188 } 2189 2190 // We got int literal token. 2191 // Only sign extend inline immediates. 2192 switch (OpTy) { 2193 case AMDGPU::OPERAND_REG_IMM_INT32: 2194 case AMDGPU::OPERAND_REG_IMM_FP32: 2195 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2196 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2197 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2198 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2200 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2201 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2202 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2203 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2204 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2206 if (isSafeTruncation(Val, 32) && 2207 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2208 AsmParser->hasInv2PiInlineImm())) { 2209 Inst.addOperand(MCOperand::createImm(Val)); 2210 setImmKindConst(); 2211 return; 2212 } 2213 2214 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2215 setImmKindLiteral(); 2216 return; 2217 2218 case AMDGPU::OPERAND_REG_IMM_INT64: 2219 case AMDGPU::OPERAND_REG_IMM_FP64: 2220 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2221 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2222 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2223 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 setImmKindConst(); 2226 return; 2227 } 2228 2229 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2230 setImmKindLiteral(); 2231 return; 2232 2233 case AMDGPU::OPERAND_REG_IMM_INT16: 2234 case AMDGPU::OPERAND_REG_IMM_FP16: 2235 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2236 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2237 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2240 if (isSafeTruncation(Val, 16) && 2241 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2242 AsmParser->hasInv2PiInlineImm())) { 2243 Inst.addOperand(MCOperand::createImm(Val)); 2244 setImmKindConst(); 2245 return; 2246 } 2247 2248 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2249 setImmKindLiteral(); 2250 return; 2251 2252 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2253 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2254 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2255 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2256 assert(isSafeTruncation(Val, 16)); 2257 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2258 AsmParser->hasInv2PiInlineImm())); 2259 2260 Inst.addOperand(MCOperand::createImm(Val)); 2261 return; 2262 } 2263 case AMDGPU::OPERAND_KIMM32: 2264 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2265 setImmKindNone(); 2266 return; 2267 case AMDGPU::OPERAND_KIMM16: 2268 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2269 setImmKindNone(); 2270 return; 2271 default: 2272 llvm_unreachable("invalid operand size"); 2273 } 2274 } 2275 2276 template <unsigned Bitwidth> 2277 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2278 APInt Literal(64, Imm.Val); 2279 setImmKindNone(); 2280 2281 if (!Imm.IsFPImm) { 2282 // We got int literal token. 2283 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2284 return; 2285 } 2286 2287 bool Lost; 2288 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2289 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2290 APFloat::rmNearestTiesToEven, &Lost); 2291 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2292 } 2293 2294 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2295 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2296 } 2297 2298 static bool isInlineValue(unsigned Reg) { 2299 switch (Reg) { 2300 case AMDGPU::SRC_SHARED_BASE: 2301 case AMDGPU::SRC_SHARED_LIMIT: 2302 case AMDGPU::SRC_PRIVATE_BASE: 2303 case AMDGPU::SRC_PRIVATE_LIMIT: 2304 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2305 return true; 2306 case AMDGPU::SRC_VCCZ: 2307 case AMDGPU::SRC_EXECZ: 2308 case AMDGPU::SRC_SCC: 2309 return true; 2310 case AMDGPU::SGPR_NULL: 2311 return true; 2312 default: 2313 return false; 2314 } 2315 } 2316 2317 bool AMDGPUOperand::isInlineValue() const { 2318 return isRegKind() && ::isInlineValue(getReg()); 2319 } 2320 2321 //===----------------------------------------------------------------------===// 2322 // AsmParser 2323 //===----------------------------------------------------------------------===// 2324 2325 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2326 if (Is == IS_VGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::VGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::VReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::VReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::VReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::VReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::VReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::VReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::VReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::VReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::VReg_1024RegClassID; 2349 } 2350 } else if (Is == IS_TTMP) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::TTMP_32RegClassID; 2355 case 64: 2356 return AMDGPU::TTMP_64RegClassID; 2357 case 128: 2358 return AMDGPU::TTMP_128RegClassID; 2359 case 256: 2360 return AMDGPU::TTMP_256RegClassID; 2361 case 512: 2362 return AMDGPU::TTMP_512RegClassID; 2363 } 2364 } else if (Is == IS_SGPR) { 2365 switch (RegWidth) { 2366 default: return -1; 2367 case 32: 2368 return AMDGPU::SGPR_32RegClassID; 2369 case 64: 2370 return AMDGPU::SGPR_64RegClassID; 2371 case 96: 2372 return AMDGPU::SGPR_96RegClassID; 2373 case 128: 2374 return AMDGPU::SGPR_128RegClassID; 2375 case 160: 2376 return AMDGPU::SGPR_160RegClassID; 2377 case 192: 2378 return AMDGPU::SGPR_192RegClassID; 2379 case 224: 2380 return AMDGPU::SGPR_224RegClassID; 2381 case 256: 2382 return AMDGPU::SGPR_256RegClassID; 2383 case 512: 2384 return AMDGPU::SGPR_512RegClassID; 2385 } 2386 } else if (Is == IS_AGPR) { 2387 switch (RegWidth) { 2388 default: return -1; 2389 case 32: 2390 return AMDGPU::AGPR_32RegClassID; 2391 case 64: 2392 return AMDGPU::AReg_64RegClassID; 2393 case 96: 2394 return AMDGPU::AReg_96RegClassID; 2395 case 128: 2396 return AMDGPU::AReg_128RegClassID; 2397 case 160: 2398 return AMDGPU::AReg_160RegClassID; 2399 case 192: 2400 return AMDGPU::AReg_192RegClassID; 2401 case 224: 2402 return AMDGPU::AReg_224RegClassID; 2403 case 256: 2404 return AMDGPU::AReg_256RegClassID; 2405 case 512: 2406 return AMDGPU::AReg_512RegClassID; 2407 case 1024: 2408 return AMDGPU::AReg_1024RegClassID; 2409 } 2410 } 2411 return -1; 2412 } 2413 2414 static unsigned getSpecialRegForName(StringRef RegName) { 2415 return StringSwitch<unsigned>(RegName) 2416 .Case("exec", AMDGPU::EXEC) 2417 .Case("vcc", AMDGPU::VCC) 2418 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2419 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2420 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2421 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2422 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2423 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2424 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2425 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2426 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2427 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2428 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2429 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2430 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2431 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2432 .Case("m0", AMDGPU::M0) 2433 .Case("vccz", AMDGPU::SRC_VCCZ) 2434 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2435 .Case("execz", AMDGPU::SRC_EXECZ) 2436 .Case("src_execz", AMDGPU::SRC_EXECZ) 2437 .Case("scc", AMDGPU::SRC_SCC) 2438 .Case("src_scc", AMDGPU::SRC_SCC) 2439 .Case("tba", AMDGPU::TBA) 2440 .Case("tma", AMDGPU::TMA) 2441 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2442 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2443 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2444 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2445 .Case("vcc_lo", AMDGPU::VCC_LO) 2446 .Case("vcc_hi", AMDGPU::VCC_HI) 2447 .Case("exec_lo", AMDGPU::EXEC_LO) 2448 .Case("exec_hi", AMDGPU::EXEC_HI) 2449 .Case("tma_lo", AMDGPU::TMA_LO) 2450 .Case("tma_hi", AMDGPU::TMA_HI) 2451 .Case("tba_lo", AMDGPU::TBA_LO) 2452 .Case("tba_hi", AMDGPU::TBA_HI) 2453 .Case("pc", AMDGPU::PC_REG) 2454 .Case("null", AMDGPU::SGPR_NULL) 2455 .Default(AMDGPU::NoRegister); 2456 } 2457 2458 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2459 SMLoc &EndLoc, bool RestoreOnFailure) { 2460 auto R = parseRegister(); 2461 if (!R) return true; 2462 assert(R->isReg()); 2463 RegNo = R->getReg(); 2464 StartLoc = R->getStartLoc(); 2465 EndLoc = R->getEndLoc(); 2466 return false; 2467 } 2468 2469 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2470 SMLoc &EndLoc) { 2471 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2472 } 2473 2474 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2475 SMLoc &StartLoc, 2476 SMLoc &EndLoc) { 2477 bool Result = 2478 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2479 bool PendingErrors = getParser().hasPendingError(); 2480 getParser().clearPendingErrors(); 2481 if (PendingErrors) 2482 return MatchOperand_ParseFail; 2483 if (Result) 2484 return MatchOperand_NoMatch; 2485 return MatchOperand_Success; 2486 } 2487 2488 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2489 RegisterKind RegKind, unsigned Reg1, 2490 SMLoc Loc) { 2491 switch (RegKind) { 2492 case IS_SPECIAL: 2493 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2494 Reg = AMDGPU::EXEC; 2495 RegWidth = 64; 2496 return true; 2497 } 2498 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2499 Reg = AMDGPU::FLAT_SCR; 2500 RegWidth = 64; 2501 return true; 2502 } 2503 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2504 Reg = AMDGPU::XNACK_MASK; 2505 RegWidth = 64; 2506 return true; 2507 } 2508 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2509 Reg = AMDGPU::VCC; 2510 RegWidth = 64; 2511 return true; 2512 } 2513 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2514 Reg = AMDGPU::TBA; 2515 RegWidth = 64; 2516 return true; 2517 } 2518 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2519 Reg = AMDGPU::TMA; 2520 RegWidth = 64; 2521 return true; 2522 } 2523 Error(Loc, "register does not fit in the list"); 2524 return false; 2525 case IS_VGPR: 2526 case IS_SGPR: 2527 case IS_AGPR: 2528 case IS_TTMP: 2529 if (Reg1 != Reg + RegWidth / 32) { 2530 Error(Loc, "registers in a list must have consecutive indices"); 2531 return false; 2532 } 2533 RegWidth += 32; 2534 return true; 2535 default: 2536 llvm_unreachable("unexpected register kind"); 2537 } 2538 } 2539 2540 struct RegInfo { 2541 StringLiteral Name; 2542 RegisterKind Kind; 2543 }; 2544 2545 static constexpr RegInfo RegularRegisters[] = { 2546 {{"v"}, IS_VGPR}, 2547 {{"s"}, IS_SGPR}, 2548 {{"ttmp"}, IS_TTMP}, 2549 {{"acc"}, IS_AGPR}, 2550 {{"a"}, IS_AGPR}, 2551 }; 2552 2553 static bool isRegularReg(RegisterKind Kind) { 2554 return Kind == IS_VGPR || 2555 Kind == IS_SGPR || 2556 Kind == IS_TTMP || 2557 Kind == IS_AGPR; 2558 } 2559 2560 static const RegInfo* getRegularRegInfo(StringRef Str) { 2561 for (const RegInfo &Reg : RegularRegisters) 2562 if (Str.startswith(Reg.Name)) 2563 return &Reg; 2564 return nullptr; 2565 } 2566 2567 static bool getRegNum(StringRef Str, unsigned& Num) { 2568 return !Str.getAsInteger(10, Num); 2569 } 2570 2571 bool 2572 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2573 const AsmToken &NextToken) const { 2574 2575 // A list of consecutive registers: [s0,s1,s2,s3] 2576 if (Token.is(AsmToken::LBrac)) 2577 return true; 2578 2579 if (!Token.is(AsmToken::Identifier)) 2580 return false; 2581 2582 // A single register like s0 or a range of registers like s[0:1] 2583 2584 StringRef Str = Token.getString(); 2585 const RegInfo *Reg = getRegularRegInfo(Str); 2586 if (Reg) { 2587 StringRef RegName = Reg->Name; 2588 StringRef RegSuffix = Str.substr(RegName.size()); 2589 if (!RegSuffix.empty()) { 2590 unsigned Num; 2591 // A single register with an index: rXX 2592 if (getRegNum(RegSuffix, Num)) 2593 return true; 2594 } else { 2595 // A range of registers: r[XX:YY]. 2596 if (NextToken.is(AsmToken::LBrac)) 2597 return true; 2598 } 2599 } 2600 2601 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2602 } 2603 2604 bool 2605 AMDGPUAsmParser::isRegister() 2606 { 2607 return isRegister(getToken(), peekToken()); 2608 } 2609 2610 unsigned 2611 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2612 unsigned RegNum, 2613 unsigned RegWidth, 2614 SMLoc Loc) { 2615 2616 assert(isRegularReg(RegKind)); 2617 2618 unsigned AlignSize = 1; 2619 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2620 // SGPR and TTMP registers must be aligned. 2621 // Max required alignment is 4 dwords. 2622 AlignSize = std::min(RegWidth / 32, 4u); 2623 } 2624 2625 if (RegNum % AlignSize != 0) { 2626 Error(Loc, "invalid register alignment"); 2627 return AMDGPU::NoRegister; 2628 } 2629 2630 unsigned RegIdx = RegNum / AlignSize; 2631 int RCID = getRegClass(RegKind, RegWidth); 2632 if (RCID == -1) { 2633 Error(Loc, "invalid or unsupported register size"); 2634 return AMDGPU::NoRegister; 2635 } 2636 2637 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2638 const MCRegisterClass RC = TRI->getRegClass(RCID); 2639 if (RegIdx >= RC.getNumRegs()) { 2640 Error(Loc, "register index is out of range"); 2641 return AMDGPU::NoRegister; 2642 } 2643 2644 return RC.getRegister(RegIdx); 2645 } 2646 2647 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2648 int64_t RegLo, RegHi; 2649 if (!skipToken(AsmToken::LBrac, "missing register index")) 2650 return false; 2651 2652 SMLoc FirstIdxLoc = getLoc(); 2653 SMLoc SecondIdxLoc; 2654 2655 if (!parseExpr(RegLo)) 2656 return false; 2657 2658 if (trySkipToken(AsmToken::Colon)) { 2659 SecondIdxLoc = getLoc(); 2660 if (!parseExpr(RegHi)) 2661 return false; 2662 } else { 2663 RegHi = RegLo; 2664 } 2665 2666 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2667 return false; 2668 2669 if (!isUInt<32>(RegLo)) { 2670 Error(FirstIdxLoc, "invalid register index"); 2671 return false; 2672 } 2673 2674 if (!isUInt<32>(RegHi)) { 2675 Error(SecondIdxLoc, "invalid register index"); 2676 return false; 2677 } 2678 2679 if (RegLo > RegHi) { 2680 Error(FirstIdxLoc, "first register index should not exceed second index"); 2681 return false; 2682 } 2683 2684 Num = static_cast<unsigned>(RegLo); 2685 RegWidth = 32 * ((RegHi - RegLo) + 1); 2686 return true; 2687 } 2688 2689 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2690 unsigned &RegNum, unsigned &RegWidth, 2691 SmallVectorImpl<AsmToken> &Tokens) { 2692 assert(isToken(AsmToken::Identifier)); 2693 unsigned Reg = getSpecialRegForName(getTokenStr()); 2694 if (Reg) { 2695 RegNum = 0; 2696 RegWidth = 32; 2697 RegKind = IS_SPECIAL; 2698 Tokens.push_back(getToken()); 2699 lex(); // skip register name 2700 } 2701 return Reg; 2702 } 2703 2704 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2705 unsigned &RegNum, unsigned &RegWidth, 2706 SmallVectorImpl<AsmToken> &Tokens) { 2707 assert(isToken(AsmToken::Identifier)); 2708 StringRef RegName = getTokenStr(); 2709 auto Loc = getLoc(); 2710 2711 const RegInfo *RI = getRegularRegInfo(RegName); 2712 if (!RI) { 2713 Error(Loc, "invalid register name"); 2714 return AMDGPU::NoRegister; 2715 } 2716 2717 Tokens.push_back(getToken()); 2718 lex(); // skip register name 2719 2720 RegKind = RI->Kind; 2721 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2722 if (!RegSuffix.empty()) { 2723 // Single 32-bit register: vXX. 2724 if (!getRegNum(RegSuffix, RegNum)) { 2725 Error(Loc, "invalid register index"); 2726 return AMDGPU::NoRegister; 2727 } 2728 RegWidth = 32; 2729 } else { 2730 // Range of registers: v[XX:YY]. ":YY" is optional. 2731 if (!ParseRegRange(RegNum, RegWidth)) 2732 return AMDGPU::NoRegister; 2733 } 2734 2735 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2736 } 2737 2738 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2739 unsigned &RegWidth, 2740 SmallVectorImpl<AsmToken> &Tokens) { 2741 unsigned Reg = AMDGPU::NoRegister; 2742 auto ListLoc = getLoc(); 2743 2744 if (!skipToken(AsmToken::LBrac, 2745 "expected a register or a list of registers")) { 2746 return AMDGPU::NoRegister; 2747 } 2748 2749 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2750 2751 auto Loc = getLoc(); 2752 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2753 return AMDGPU::NoRegister; 2754 if (RegWidth != 32) { 2755 Error(Loc, "expected a single 32-bit register"); 2756 return AMDGPU::NoRegister; 2757 } 2758 2759 for (; trySkipToken(AsmToken::Comma); ) { 2760 RegisterKind NextRegKind; 2761 unsigned NextReg, NextRegNum, NextRegWidth; 2762 Loc = getLoc(); 2763 2764 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2765 NextRegNum, NextRegWidth, 2766 Tokens)) { 2767 return AMDGPU::NoRegister; 2768 } 2769 if (NextRegWidth != 32) { 2770 Error(Loc, "expected a single 32-bit register"); 2771 return AMDGPU::NoRegister; 2772 } 2773 if (NextRegKind != RegKind) { 2774 Error(Loc, "registers in a list must be of the same kind"); 2775 return AMDGPU::NoRegister; 2776 } 2777 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2778 return AMDGPU::NoRegister; 2779 } 2780 2781 if (!skipToken(AsmToken::RBrac, 2782 "expected a comma or a closing square bracket")) { 2783 return AMDGPU::NoRegister; 2784 } 2785 2786 if (isRegularReg(RegKind)) 2787 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2788 2789 return Reg; 2790 } 2791 2792 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2793 unsigned &RegNum, unsigned &RegWidth, 2794 SmallVectorImpl<AsmToken> &Tokens) { 2795 auto Loc = getLoc(); 2796 Reg = AMDGPU::NoRegister; 2797 2798 if (isToken(AsmToken::Identifier)) { 2799 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2800 if (Reg == AMDGPU::NoRegister) 2801 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2802 } else { 2803 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2804 } 2805 2806 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2807 if (Reg == AMDGPU::NoRegister) { 2808 assert(Parser.hasPendingError()); 2809 return false; 2810 } 2811 2812 if (!subtargetHasRegister(*TRI, Reg)) { 2813 if (Reg == AMDGPU::SGPR_NULL) { 2814 Error(Loc, "'null' operand is not supported on this GPU"); 2815 } else { 2816 Error(Loc, "register not available on this GPU"); 2817 } 2818 return false; 2819 } 2820 2821 return true; 2822 } 2823 2824 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2825 unsigned &RegNum, unsigned &RegWidth, 2826 bool RestoreOnFailure /*=false*/) { 2827 Reg = AMDGPU::NoRegister; 2828 2829 SmallVector<AsmToken, 1> Tokens; 2830 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2831 if (RestoreOnFailure) { 2832 while (!Tokens.empty()) { 2833 getLexer().UnLex(Tokens.pop_back_val()); 2834 } 2835 } 2836 return true; 2837 } 2838 return false; 2839 } 2840 2841 Optional<StringRef> 2842 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2843 switch (RegKind) { 2844 case IS_VGPR: 2845 return StringRef(".amdgcn.next_free_vgpr"); 2846 case IS_SGPR: 2847 return StringRef(".amdgcn.next_free_sgpr"); 2848 default: 2849 return None; 2850 } 2851 } 2852 2853 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2854 auto SymbolName = getGprCountSymbolName(RegKind); 2855 assert(SymbolName && "initializing invalid register kind"); 2856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2857 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2858 } 2859 2860 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2861 unsigned DwordRegIndex, 2862 unsigned RegWidth) { 2863 // Symbols are only defined for GCN targets 2864 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2865 return true; 2866 2867 auto SymbolName = getGprCountSymbolName(RegKind); 2868 if (!SymbolName) 2869 return true; 2870 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2871 2872 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2873 int64_t OldCount; 2874 2875 if (!Sym->isVariable()) 2876 return !Error(getLoc(), 2877 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2878 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2879 return !Error( 2880 getLoc(), 2881 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2882 2883 if (OldCount <= NewMax) 2884 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2885 2886 return true; 2887 } 2888 2889 std::unique_ptr<AMDGPUOperand> 2890 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2891 const auto &Tok = getToken(); 2892 SMLoc StartLoc = Tok.getLoc(); 2893 SMLoc EndLoc = Tok.getEndLoc(); 2894 RegisterKind RegKind; 2895 unsigned Reg, RegNum, RegWidth; 2896 2897 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2898 return nullptr; 2899 } 2900 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2901 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2902 return nullptr; 2903 } else 2904 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2905 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2906 } 2907 2908 OperandMatchResultTy 2909 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2910 // TODO: add syntactic sugar for 1/(2*PI) 2911 2912 assert(!isRegister()); 2913 assert(!isModifier()); 2914 2915 const auto& Tok = getToken(); 2916 const auto& NextTok = peekToken(); 2917 bool IsReal = Tok.is(AsmToken::Real); 2918 SMLoc S = getLoc(); 2919 bool Negate = false; 2920 2921 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2922 lex(); 2923 IsReal = true; 2924 Negate = true; 2925 } 2926 2927 if (IsReal) { 2928 // Floating-point expressions are not supported. 2929 // Can only allow floating-point literals with an 2930 // optional sign. 2931 2932 StringRef Num = getTokenStr(); 2933 lex(); 2934 2935 APFloat RealVal(APFloat::IEEEdouble()); 2936 auto roundMode = APFloat::rmNearestTiesToEven; 2937 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2938 return MatchOperand_ParseFail; 2939 } 2940 if (Negate) 2941 RealVal.changeSign(); 2942 2943 Operands.push_back( 2944 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2945 AMDGPUOperand::ImmTyNone, true)); 2946 2947 return MatchOperand_Success; 2948 2949 } else { 2950 int64_t IntVal; 2951 const MCExpr *Expr; 2952 SMLoc S = getLoc(); 2953 2954 if (HasSP3AbsModifier) { 2955 // This is a workaround for handling expressions 2956 // as arguments of SP3 'abs' modifier, for example: 2957 // |1.0| 2958 // |-1| 2959 // |1+x| 2960 // This syntax is not compatible with syntax of standard 2961 // MC expressions (due to the trailing '|'). 2962 SMLoc EndLoc; 2963 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2964 return MatchOperand_ParseFail; 2965 } else { 2966 if (Parser.parseExpression(Expr)) 2967 return MatchOperand_ParseFail; 2968 } 2969 2970 if (Expr->evaluateAsAbsolute(IntVal)) { 2971 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2972 } else { 2973 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2974 } 2975 2976 return MatchOperand_Success; 2977 } 2978 2979 return MatchOperand_NoMatch; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2984 if (!isRegister()) 2985 return MatchOperand_NoMatch; 2986 2987 if (auto R = parseRegister()) { 2988 assert(R->isReg()); 2989 Operands.push_back(std::move(R)); 2990 return MatchOperand_Success; 2991 } 2992 return MatchOperand_ParseFail; 2993 } 2994 2995 OperandMatchResultTy 2996 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2997 auto res = parseReg(Operands); 2998 if (res != MatchOperand_NoMatch) { 2999 return res; 3000 } else if (isModifier()) { 3001 return MatchOperand_NoMatch; 3002 } else { 3003 return parseImm(Operands, HasSP3AbsMod); 3004 } 3005 } 3006 3007 bool 3008 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3009 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3010 const auto &str = Token.getString(); 3011 return str == "abs" || str == "neg" || str == "sext"; 3012 } 3013 return false; 3014 } 3015 3016 bool 3017 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3018 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3024 } 3025 3026 bool 3027 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3028 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3029 } 3030 3031 // Check if this is an operand modifier or an opcode modifier 3032 // which may look like an expression but it is not. We should 3033 // avoid parsing these modifiers as expressions. Currently 3034 // recognized sequences are: 3035 // |...| 3036 // abs(...) 3037 // neg(...) 3038 // sext(...) 3039 // -reg 3040 // -|...| 3041 // -abs(...) 3042 // name:... 3043 // Note that simple opcode modifiers like 'gds' may be parsed as 3044 // expressions; this is a special case. See getExpressionAsToken. 3045 // 3046 bool 3047 AMDGPUAsmParser::isModifier() { 3048 3049 AsmToken Tok = getToken(); 3050 AsmToken NextToken[2]; 3051 peekTokens(NextToken); 3052 3053 return isOperandModifier(Tok, NextToken[0]) || 3054 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3055 isOpcodeModifierWithVal(Tok, NextToken[0]); 3056 } 3057 3058 // Check if the current token is an SP3 'neg' modifier. 3059 // Currently this modifier is allowed in the following context: 3060 // 3061 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3062 // 2. Before an 'abs' modifier: -abs(...) 3063 // 3. Before an SP3 'abs' modifier: -|...| 3064 // 3065 // In all other cases "-" is handled as a part 3066 // of an expression that follows the sign. 3067 // 3068 // Note: When "-" is followed by an integer literal, 3069 // this is interpreted as integer negation rather 3070 // than a floating-point NEG modifier applied to N. 3071 // Beside being contr-intuitive, such use of floating-point 3072 // NEG modifier would have resulted in different meaning 3073 // of integer literals used with VOP1/2/C and VOP3, 3074 // for example: 3075 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3076 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3077 // Negative fp literals with preceding "-" are 3078 // handled likewise for uniformity 3079 // 3080 bool 3081 AMDGPUAsmParser::parseSP3NegModifier() { 3082 3083 AsmToken NextToken[2]; 3084 peekTokens(NextToken); 3085 3086 if (isToken(AsmToken::Minus) && 3087 (isRegister(NextToken[0], NextToken[1]) || 3088 NextToken[0].is(AsmToken::Pipe) || 3089 isId(NextToken[0], "abs"))) { 3090 lex(); 3091 return true; 3092 } 3093 3094 return false; 3095 } 3096 3097 OperandMatchResultTy 3098 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3099 bool AllowImm) { 3100 bool Neg, SP3Neg; 3101 bool Abs, SP3Abs; 3102 SMLoc Loc; 3103 3104 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3105 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3106 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3107 return MatchOperand_ParseFail; 3108 } 3109 3110 SP3Neg = parseSP3NegModifier(); 3111 3112 Loc = getLoc(); 3113 Neg = trySkipId("neg"); 3114 if (Neg && SP3Neg) { 3115 Error(Loc, "expected register or immediate"); 3116 return MatchOperand_ParseFail; 3117 } 3118 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3119 return MatchOperand_ParseFail; 3120 3121 Abs = trySkipId("abs"); 3122 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3123 return MatchOperand_ParseFail; 3124 3125 Loc = getLoc(); 3126 SP3Abs = trySkipToken(AsmToken::Pipe); 3127 if (Abs && SP3Abs) { 3128 Error(Loc, "expected register or immediate"); 3129 return MatchOperand_ParseFail; 3130 } 3131 3132 OperandMatchResultTy Res; 3133 if (AllowImm) { 3134 Res = parseRegOrImm(Operands, SP3Abs); 3135 } else { 3136 Res = parseReg(Operands); 3137 } 3138 if (Res != MatchOperand_Success) { 3139 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3140 } 3141 3142 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3143 return MatchOperand_ParseFail; 3144 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3145 return MatchOperand_ParseFail; 3146 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3147 return MatchOperand_ParseFail; 3148 3149 AMDGPUOperand::Modifiers Mods; 3150 Mods.Abs = Abs || SP3Abs; 3151 Mods.Neg = Neg || SP3Neg; 3152 3153 if (Mods.hasFPModifiers()) { 3154 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3155 if (Op.isExpr()) { 3156 Error(Op.getStartLoc(), "expected an absolute expression"); 3157 return MatchOperand_ParseFail; 3158 } 3159 Op.setModifiers(Mods); 3160 } 3161 return MatchOperand_Success; 3162 } 3163 3164 OperandMatchResultTy 3165 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3166 bool AllowImm) { 3167 bool Sext = trySkipId("sext"); 3168 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3169 return MatchOperand_ParseFail; 3170 3171 OperandMatchResultTy Res; 3172 if (AllowImm) { 3173 Res = parseRegOrImm(Operands); 3174 } else { 3175 Res = parseReg(Operands); 3176 } 3177 if (Res != MatchOperand_Success) { 3178 return Sext? MatchOperand_ParseFail : Res; 3179 } 3180 3181 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3182 return MatchOperand_ParseFail; 3183 3184 AMDGPUOperand::Modifiers Mods; 3185 Mods.Sext = Sext; 3186 3187 if (Mods.hasIntModifiers()) { 3188 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3189 if (Op.isExpr()) { 3190 Error(Op.getStartLoc(), "expected an absolute expression"); 3191 return MatchOperand_ParseFail; 3192 } 3193 Op.setModifiers(Mods); 3194 } 3195 3196 return MatchOperand_Success; 3197 } 3198 3199 OperandMatchResultTy 3200 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3201 return parseRegOrImmWithFPInputMods(Operands, false); 3202 } 3203 3204 OperandMatchResultTy 3205 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3206 return parseRegOrImmWithIntInputMods(Operands, false); 3207 } 3208 3209 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3210 auto Loc = getLoc(); 3211 if (trySkipId("off")) { 3212 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3213 AMDGPUOperand::ImmTyOff, false)); 3214 return MatchOperand_Success; 3215 } 3216 3217 if (!isRegister()) 3218 return MatchOperand_NoMatch; 3219 3220 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3221 if (Reg) { 3222 Operands.push_back(std::move(Reg)); 3223 return MatchOperand_Success; 3224 } 3225 3226 return MatchOperand_ParseFail; 3227 3228 } 3229 3230 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3231 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3232 3233 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3234 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3235 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3236 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3237 return Match_InvalidOperand; 3238 3239 if ((TSFlags & SIInstrFlags::VOP3) && 3240 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3241 getForcedEncodingSize() != 64) 3242 return Match_PreferE32; 3243 3244 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3245 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3246 // v_mac_f32/16 allow only dst_sel == DWORD; 3247 auto OpNum = 3248 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3249 const auto &Op = Inst.getOperand(OpNum); 3250 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3251 return Match_InvalidOperand; 3252 } 3253 } 3254 3255 return Match_Success; 3256 } 3257 3258 static ArrayRef<unsigned> getAllVariants() { 3259 static const unsigned Variants[] = { 3260 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3261 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3262 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3263 }; 3264 3265 return makeArrayRef(Variants); 3266 } 3267 3268 // What asm variants we should check 3269 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3270 if (isForcedDPP() && isForcedVOP3()) { 3271 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3272 return makeArrayRef(Variants); 3273 } 3274 if (getForcedEncodingSize() == 32) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 if (isForcedVOP3()) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3281 return makeArrayRef(Variants); 3282 } 3283 3284 if (isForcedSDWA()) { 3285 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3286 AMDGPUAsmVariants::SDWA9}; 3287 return makeArrayRef(Variants); 3288 } 3289 3290 if (isForcedDPP()) { 3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3292 return makeArrayRef(Variants); 3293 } 3294 3295 return getAllVariants(); 3296 } 3297 3298 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3299 if (isForcedDPP() && isForcedVOP3()) 3300 return "e64_dpp"; 3301 3302 if (getForcedEncodingSize() == 32) 3303 return "e32"; 3304 3305 if (isForcedVOP3()) 3306 return "e64"; 3307 3308 if (isForcedSDWA()) 3309 return "sdwa"; 3310 3311 if (isForcedDPP()) 3312 return "dpp"; 3313 3314 return ""; 3315 } 3316 3317 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3318 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3319 const unsigned Num = Desc.getNumImplicitUses(); 3320 for (unsigned i = 0; i < Num; ++i) { 3321 unsigned Reg = Desc.ImplicitUses[i]; 3322 switch (Reg) { 3323 case AMDGPU::FLAT_SCR: 3324 case AMDGPU::VCC: 3325 case AMDGPU::VCC_LO: 3326 case AMDGPU::VCC_HI: 3327 case AMDGPU::M0: 3328 return Reg; 3329 default: 3330 break; 3331 } 3332 } 3333 return AMDGPU::NoRegister; 3334 } 3335 3336 // NB: This code is correct only when used to check constant 3337 // bus limitations because GFX7 support no f16 inline constants. 3338 // Note that there are no cases when a GFX7 opcode violates 3339 // constant bus limitations due to the use of an f16 constant. 3340 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3341 unsigned OpIdx) const { 3342 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3343 3344 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3345 return false; 3346 } 3347 3348 const MCOperand &MO = Inst.getOperand(OpIdx); 3349 3350 int64_t Val = MO.getImm(); 3351 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3352 3353 switch (OpSize) { // expected operand size 3354 case 8: 3355 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3356 case 4: 3357 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3358 case 2: { 3359 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3360 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3362 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3363 return AMDGPU::isInlinableIntLiteral(Val); 3364 3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3368 return AMDGPU::isInlinableIntLiteralV216(Val); 3369 3370 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3371 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3372 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3373 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3374 3375 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3376 } 3377 default: 3378 llvm_unreachable("invalid operand size"); 3379 } 3380 } 3381 3382 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3383 if (!isGFX10Plus()) 3384 return 1; 3385 3386 switch (Opcode) { 3387 // 64-bit shift instructions can use only one scalar value input 3388 case AMDGPU::V_LSHLREV_B64_e64: 3389 case AMDGPU::V_LSHLREV_B64_gfx10: 3390 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3391 case AMDGPU::V_LSHRREV_B64_e64: 3392 case AMDGPU::V_LSHRREV_B64_gfx10: 3393 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3394 case AMDGPU::V_ASHRREV_I64_e64: 3395 case AMDGPU::V_ASHRREV_I64_gfx10: 3396 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3397 case AMDGPU::V_LSHL_B64_e64: 3398 case AMDGPU::V_LSHR_B64_e64: 3399 case AMDGPU::V_ASHR_I64_e64: 3400 return 1; 3401 default: 3402 return 2; 3403 } 3404 } 3405 3406 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3407 const MCOperand &MO = Inst.getOperand(OpIdx); 3408 if (MO.isImm()) { 3409 return !isInlineConstant(Inst, OpIdx); 3410 } else if (MO.isReg()) { 3411 auto Reg = MO.getReg(); 3412 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3413 auto PReg = mc2PseudoReg(Reg); 3414 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3415 } else { 3416 return true; 3417 } 3418 } 3419 3420 bool 3421 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3422 const OperandVector &Operands) { 3423 const unsigned Opcode = Inst.getOpcode(); 3424 const MCInstrDesc &Desc = MII.get(Opcode); 3425 unsigned LastSGPR = AMDGPU::NoRegister; 3426 unsigned ConstantBusUseCount = 0; 3427 unsigned NumLiterals = 0; 3428 unsigned LiteralSize; 3429 3430 if (Desc.TSFlags & 3431 (SIInstrFlags::VOPC | 3432 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3433 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3434 SIInstrFlags::SDWA)) { 3435 // Check special imm operands (used by madmk, etc) 3436 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3437 ++NumLiterals; 3438 LiteralSize = 4; 3439 } 3440 3441 SmallDenseSet<unsigned> SGPRsUsed; 3442 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3443 if (SGPRUsed != AMDGPU::NoRegister) { 3444 SGPRsUsed.insert(SGPRUsed); 3445 ++ConstantBusUseCount; 3446 } 3447 3448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3449 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3450 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3451 3452 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3453 3454 for (int OpIdx : OpIndices) { 3455 if (OpIdx == -1) break; 3456 3457 const MCOperand &MO = Inst.getOperand(OpIdx); 3458 if (usesConstantBus(Inst, OpIdx)) { 3459 if (MO.isReg()) { 3460 LastSGPR = mc2PseudoReg(MO.getReg()); 3461 // Pairs of registers with a partial intersections like these 3462 // s0, s[0:1] 3463 // flat_scratch_lo, flat_scratch 3464 // flat_scratch_lo, flat_scratch_hi 3465 // are theoretically valid but they are disabled anyway. 3466 // Note that this code mimics SIInstrInfo::verifyInstruction 3467 if (SGPRsUsed.insert(LastSGPR).second) { 3468 ++ConstantBusUseCount; 3469 } 3470 } else { // Expression or a literal 3471 3472 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3473 continue; // special operand like VINTERP attr_chan 3474 3475 // An instruction may use only one literal. 3476 // This has been validated on the previous step. 3477 // See validateVOPLiteral. 3478 // This literal may be used as more than one operand. 3479 // If all these operands are of the same size, 3480 // this literal counts as one scalar value. 3481 // Otherwise it counts as 2 scalar values. 3482 // See "GFX10 Shader Programming", section 3.6.2.3. 3483 3484 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3485 if (Size < 4) Size = 4; 3486 3487 if (NumLiterals == 0) { 3488 NumLiterals = 1; 3489 LiteralSize = Size; 3490 } else if (LiteralSize != Size) { 3491 NumLiterals = 2; 3492 } 3493 } 3494 } 3495 } 3496 } 3497 ConstantBusUseCount += NumLiterals; 3498 3499 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3500 return true; 3501 3502 SMLoc LitLoc = getLitLoc(Operands); 3503 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3504 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3505 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3506 return false; 3507 } 3508 3509 bool 3510 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3511 const OperandVector &Operands) { 3512 const unsigned Opcode = Inst.getOpcode(); 3513 const MCInstrDesc &Desc = MII.get(Opcode); 3514 3515 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3516 if (DstIdx == -1 || 3517 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3518 return true; 3519 } 3520 3521 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3522 3523 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3524 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3525 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3526 3527 assert(DstIdx != -1); 3528 const MCOperand &Dst = Inst.getOperand(DstIdx); 3529 assert(Dst.isReg()); 3530 3531 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3532 3533 for (int SrcIdx : SrcIndices) { 3534 if (SrcIdx == -1) break; 3535 const MCOperand &Src = Inst.getOperand(SrcIdx); 3536 if (Src.isReg()) { 3537 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3538 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3539 Error(getRegLoc(SrcReg, Operands), 3540 "destination must be different than all sources"); 3541 return false; 3542 } 3543 } 3544 } 3545 3546 return true; 3547 } 3548 3549 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3550 3551 const unsigned Opc = Inst.getOpcode(); 3552 const MCInstrDesc &Desc = MII.get(Opc); 3553 3554 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3555 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3556 assert(ClampIdx != -1); 3557 return Inst.getOperand(ClampIdx).getImm() == 0; 3558 } 3559 3560 return true; 3561 } 3562 3563 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3564 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3569 return None; 3570 3571 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3572 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3573 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3574 3575 assert(VDataIdx != -1); 3576 3577 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3578 return None; 3579 3580 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3581 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3582 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3583 if (DMask == 0) 3584 DMask = 1; 3585 3586 bool isPackedD16 = false; 3587 unsigned DataSize = 3588 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3589 if (hasPackedD16()) { 3590 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3591 isPackedD16 = D16Idx >= 0; 3592 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3593 DataSize = (DataSize + 1) / 2; 3594 } 3595 3596 if ((VDataSize / 4) == DataSize + TFESize) 3597 return None; 3598 3599 return StringRef(isPackedD16 3600 ? "image data size does not match dmask, d16 and tfe" 3601 : "image data size does not match dmask and tfe"); 3602 } 3603 3604 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3609 return true; 3610 3611 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3612 3613 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3614 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3615 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3616 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3617 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3618 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3619 3620 assert(VAddr0Idx != -1); 3621 assert(SrsrcIdx != -1); 3622 assert(SrsrcIdx > VAddr0Idx); 3623 3624 if (DimIdx == -1) 3625 return true; // intersect_ray 3626 3627 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3628 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3629 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3630 unsigned ActualAddrSize = 3631 IsNSA ? SrsrcIdx - VAddr0Idx 3632 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3633 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3634 3635 unsigned ExpectedAddrSize = 3636 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3637 3638 if (!IsNSA) { 3639 if (ExpectedAddrSize > 8) 3640 ExpectedAddrSize = 16; 3641 3642 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3643 // This provides backward compatibility for assembly created 3644 // before 160b/192b/224b types were directly supported. 3645 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3646 return true; 3647 } 3648 3649 return ActualAddrSize == ExpectedAddrSize; 3650 } 3651 3652 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3653 3654 const unsigned Opc = Inst.getOpcode(); 3655 const MCInstrDesc &Desc = MII.get(Opc); 3656 3657 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3658 return true; 3659 if (!Desc.mayLoad() || !Desc.mayStore()) 3660 return true; // Not atomic 3661 3662 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3663 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3664 3665 // This is an incomplete check because image_atomic_cmpswap 3666 // may only use 0x3 and 0xf while other atomic operations 3667 // may use 0x1 and 0x3. However these limitations are 3668 // verified when we check that dmask matches dst size. 3669 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3670 } 3671 3672 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3673 3674 const unsigned Opc = Inst.getOpcode(); 3675 const MCInstrDesc &Desc = MII.get(Opc); 3676 3677 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3678 return true; 3679 3680 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3681 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3682 3683 // GATHER4 instructions use dmask in a different fashion compared to 3684 // other MIMG instructions. The only useful DMASK values are 3685 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3686 // (red,red,red,red) etc.) The ISA document doesn't mention 3687 // this. 3688 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3689 } 3690 3691 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3692 const unsigned Opc = Inst.getOpcode(); 3693 const MCInstrDesc &Desc = MII.get(Opc); 3694 3695 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3696 return true; 3697 3698 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3699 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3700 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3701 3702 if (!BaseOpcode->MSAA) 3703 return true; 3704 3705 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3706 assert(DimIdx != -1); 3707 3708 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3709 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3710 3711 return DimInfo->MSAA; 3712 } 3713 3714 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3715 { 3716 switch (Opcode) { 3717 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3718 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3719 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3720 return true; 3721 default: 3722 return false; 3723 } 3724 } 3725 3726 // movrels* opcodes should only allow VGPRS as src0. 3727 // This is specified in .td description for vop1/vop3, 3728 // but sdwa is handled differently. See isSDWAOperand. 3729 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3730 const OperandVector &Operands) { 3731 3732 const unsigned Opc = Inst.getOpcode(); 3733 const MCInstrDesc &Desc = MII.get(Opc); 3734 3735 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3736 return true; 3737 3738 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3739 assert(Src0Idx != -1); 3740 3741 SMLoc ErrLoc; 3742 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3743 if (Src0.isReg()) { 3744 auto Reg = mc2PseudoReg(Src0.getReg()); 3745 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3746 if (!isSGPR(Reg, TRI)) 3747 return true; 3748 ErrLoc = getRegLoc(Reg, Operands); 3749 } else { 3750 ErrLoc = getConstLoc(Operands); 3751 } 3752 3753 Error(ErrLoc, "source operand must be a VGPR"); 3754 return false; 3755 } 3756 3757 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3758 const OperandVector &Operands) { 3759 3760 const unsigned Opc = Inst.getOpcode(); 3761 3762 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3763 return true; 3764 3765 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3766 assert(Src0Idx != -1); 3767 3768 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3769 if (!Src0.isReg()) 3770 return true; 3771 3772 auto Reg = mc2PseudoReg(Src0.getReg()); 3773 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3774 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3775 Error(getRegLoc(Reg, Operands), 3776 "source operand must be either a VGPR or an inline constant"); 3777 return false; 3778 } 3779 3780 return true; 3781 } 3782 3783 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3784 const OperandVector &Operands) { 3785 const unsigned Opc = Inst.getOpcode(); 3786 const MCInstrDesc &Desc = MII.get(Opc); 3787 3788 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3789 return true; 3790 3791 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3792 if (Src2Idx == -1) 3793 return true; 3794 3795 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3796 if (!Src2.isReg()) 3797 return true; 3798 3799 MCRegister Src2Reg = Src2.getReg(); 3800 MCRegister DstReg = Inst.getOperand(0).getReg(); 3801 if (Src2Reg == DstReg) 3802 return true; 3803 3804 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3805 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3806 return true; 3807 3808 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3809 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3810 "source 2 operand must not partially overlap with dst"); 3811 return false; 3812 } 3813 3814 return true; 3815 } 3816 3817 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3818 switch (Inst.getOpcode()) { 3819 default: 3820 return true; 3821 case V_DIV_SCALE_F32_gfx6_gfx7: 3822 case V_DIV_SCALE_F32_vi: 3823 case V_DIV_SCALE_F32_gfx10: 3824 case V_DIV_SCALE_F64_gfx6_gfx7: 3825 case V_DIV_SCALE_F64_vi: 3826 case V_DIV_SCALE_F64_gfx10: 3827 break; 3828 } 3829 3830 // TODO: Check that src0 = src1 or src2. 3831 3832 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3833 AMDGPU::OpName::src2_modifiers, 3834 AMDGPU::OpName::src2_modifiers}) { 3835 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3836 .getImm() & 3837 SISrcMods::ABS) { 3838 return false; 3839 } 3840 } 3841 3842 return true; 3843 } 3844 3845 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3846 3847 const unsigned Opc = Inst.getOpcode(); 3848 const MCInstrDesc &Desc = MII.get(Opc); 3849 3850 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3851 return true; 3852 3853 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3854 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3855 if (isCI() || isSI()) 3856 return false; 3857 } 3858 3859 return true; 3860 } 3861 3862 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3863 const unsigned Opc = Inst.getOpcode(); 3864 const MCInstrDesc &Desc = MII.get(Opc); 3865 3866 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3867 return true; 3868 3869 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3870 if (DimIdx < 0) 3871 return true; 3872 3873 long Imm = Inst.getOperand(DimIdx).getImm(); 3874 if (Imm < 0 || Imm >= 8) 3875 return false; 3876 3877 return true; 3878 } 3879 3880 static bool IsRevOpcode(const unsigned Opcode) 3881 { 3882 switch (Opcode) { 3883 case AMDGPU::V_SUBREV_F32_e32: 3884 case AMDGPU::V_SUBREV_F32_e64: 3885 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3886 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3887 case AMDGPU::V_SUBREV_F32_e32_vi: 3888 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3889 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_F32_e64_vi: 3891 3892 case AMDGPU::V_SUBREV_CO_U32_e32: 3893 case AMDGPU::V_SUBREV_CO_U32_e64: 3894 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3895 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3896 3897 case AMDGPU::V_SUBBREV_U32_e32: 3898 case AMDGPU::V_SUBBREV_U32_e64: 3899 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3900 case AMDGPU::V_SUBBREV_U32_e32_vi: 3901 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3902 case AMDGPU::V_SUBBREV_U32_e64_vi: 3903 3904 case AMDGPU::V_SUBREV_U32_e32: 3905 case AMDGPU::V_SUBREV_U32_e64: 3906 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3907 case AMDGPU::V_SUBREV_U32_e32_vi: 3908 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3909 case AMDGPU::V_SUBREV_U32_e64_vi: 3910 3911 case AMDGPU::V_SUBREV_F16_e32: 3912 case AMDGPU::V_SUBREV_F16_e64: 3913 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3914 case AMDGPU::V_SUBREV_F16_e32_vi: 3915 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3916 case AMDGPU::V_SUBREV_F16_e64_vi: 3917 3918 case AMDGPU::V_SUBREV_U16_e32: 3919 case AMDGPU::V_SUBREV_U16_e64: 3920 case AMDGPU::V_SUBREV_U16_e32_vi: 3921 case AMDGPU::V_SUBREV_U16_e64_vi: 3922 3923 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3924 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3925 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3926 3927 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3928 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3929 3930 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3931 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3932 3933 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3934 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3935 3936 case AMDGPU::V_LSHRREV_B32_e32: 3937 case AMDGPU::V_LSHRREV_B32_e64: 3938 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3939 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3940 case AMDGPU::V_LSHRREV_B32_e32_vi: 3941 case AMDGPU::V_LSHRREV_B32_e64_vi: 3942 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3943 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3944 3945 case AMDGPU::V_ASHRREV_I32_e32: 3946 case AMDGPU::V_ASHRREV_I32_e64: 3947 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3948 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3949 case AMDGPU::V_ASHRREV_I32_e32_vi: 3950 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3951 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3952 case AMDGPU::V_ASHRREV_I32_e64_vi: 3953 3954 case AMDGPU::V_LSHLREV_B32_e32: 3955 case AMDGPU::V_LSHLREV_B32_e64: 3956 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3957 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3958 case AMDGPU::V_LSHLREV_B32_e32_vi: 3959 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3960 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3961 case AMDGPU::V_LSHLREV_B32_e64_vi: 3962 3963 case AMDGPU::V_LSHLREV_B16_e32: 3964 case AMDGPU::V_LSHLREV_B16_e64: 3965 case AMDGPU::V_LSHLREV_B16_e32_vi: 3966 case AMDGPU::V_LSHLREV_B16_e64_vi: 3967 case AMDGPU::V_LSHLREV_B16_gfx10: 3968 3969 case AMDGPU::V_LSHRREV_B16_e32: 3970 case AMDGPU::V_LSHRREV_B16_e64: 3971 case AMDGPU::V_LSHRREV_B16_e32_vi: 3972 case AMDGPU::V_LSHRREV_B16_e64_vi: 3973 case AMDGPU::V_LSHRREV_B16_gfx10: 3974 3975 case AMDGPU::V_ASHRREV_I16_e32: 3976 case AMDGPU::V_ASHRREV_I16_e64: 3977 case AMDGPU::V_ASHRREV_I16_e32_vi: 3978 case AMDGPU::V_ASHRREV_I16_e64_vi: 3979 case AMDGPU::V_ASHRREV_I16_gfx10: 3980 3981 case AMDGPU::V_LSHLREV_B64_e64: 3982 case AMDGPU::V_LSHLREV_B64_gfx10: 3983 case AMDGPU::V_LSHLREV_B64_vi: 3984 3985 case AMDGPU::V_LSHRREV_B64_e64: 3986 case AMDGPU::V_LSHRREV_B64_gfx10: 3987 case AMDGPU::V_LSHRREV_B64_vi: 3988 3989 case AMDGPU::V_ASHRREV_I64_e64: 3990 case AMDGPU::V_ASHRREV_I64_gfx10: 3991 case AMDGPU::V_ASHRREV_I64_vi: 3992 3993 case AMDGPU::V_PK_LSHLREV_B16: 3994 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3995 case AMDGPU::V_PK_LSHLREV_B16_vi: 3996 3997 case AMDGPU::V_PK_LSHRREV_B16: 3998 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3999 case AMDGPU::V_PK_LSHRREV_B16_vi: 4000 case AMDGPU::V_PK_ASHRREV_I16: 4001 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4002 case AMDGPU::V_PK_ASHRREV_I16_vi: 4003 return true; 4004 default: 4005 return false; 4006 } 4007 } 4008 4009 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4010 4011 using namespace SIInstrFlags; 4012 const unsigned Opcode = Inst.getOpcode(); 4013 const MCInstrDesc &Desc = MII.get(Opcode); 4014 4015 // lds_direct register is defined so that it can be used 4016 // with 9-bit operands only. Ignore encodings which do not accept these. 4017 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4018 if ((Desc.TSFlags & Enc) == 0) 4019 return None; 4020 4021 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4022 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4023 if (SrcIdx == -1) 4024 break; 4025 const auto &Src = Inst.getOperand(SrcIdx); 4026 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4027 4028 if (isGFX90A() || isGFX11Plus()) 4029 return StringRef("lds_direct is not supported on this GPU"); 4030 4031 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4032 return StringRef("lds_direct cannot be used with this instruction"); 4033 4034 if (SrcName != OpName::src0) 4035 return StringRef("lds_direct may be used as src0 only"); 4036 } 4037 } 4038 4039 return None; 4040 } 4041 4042 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4043 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4044 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4045 if (Op.isFlatOffset()) 4046 return Op.getStartLoc(); 4047 } 4048 return getLoc(); 4049 } 4050 4051 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4052 const OperandVector &Operands) { 4053 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4054 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4055 return true; 4056 4057 auto Opcode = Inst.getOpcode(); 4058 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4059 assert(OpNum != -1); 4060 4061 const auto &Op = Inst.getOperand(OpNum); 4062 if (!hasFlatOffsets() && Op.getImm() != 0) { 4063 Error(getFlatOffsetLoc(Operands), 4064 "flat offset modifier is not supported on this GPU"); 4065 return false; 4066 } 4067 4068 // For FLAT segment the offset must be positive; 4069 // MSB is ignored and forced to zero. 4070 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4071 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4072 if (!isIntN(OffsetSize, Op.getImm())) { 4073 Error(getFlatOffsetLoc(Operands), 4074 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4075 return false; 4076 } 4077 } else { 4078 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4079 if (!isUIntN(OffsetSize, Op.getImm())) { 4080 Error(getFlatOffsetLoc(Operands), 4081 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4082 return false; 4083 } 4084 } 4085 4086 return true; 4087 } 4088 4089 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4090 // Start with second operand because SMEM Offset cannot be dst or src0. 4091 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4092 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4093 if (Op.isSMEMOffset()) 4094 return Op.getStartLoc(); 4095 } 4096 return getLoc(); 4097 } 4098 4099 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4100 const OperandVector &Operands) { 4101 if (isCI() || isSI()) 4102 return true; 4103 4104 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4105 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4106 return true; 4107 4108 auto Opcode = Inst.getOpcode(); 4109 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4110 if (OpNum == -1) 4111 return true; 4112 4113 const auto &Op = Inst.getOperand(OpNum); 4114 if (!Op.isImm()) 4115 return true; 4116 4117 uint64_t Offset = Op.getImm(); 4118 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4119 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4120 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4121 return true; 4122 4123 Error(getSMEMOffsetLoc(Operands), 4124 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4125 "expected a 21-bit signed offset"); 4126 4127 return false; 4128 } 4129 4130 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4131 unsigned Opcode = Inst.getOpcode(); 4132 const MCInstrDesc &Desc = MII.get(Opcode); 4133 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4134 return true; 4135 4136 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4137 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4138 4139 const int OpIndices[] = { Src0Idx, Src1Idx }; 4140 4141 unsigned NumExprs = 0; 4142 unsigned NumLiterals = 0; 4143 uint32_t LiteralValue; 4144 4145 for (int OpIdx : OpIndices) { 4146 if (OpIdx == -1) break; 4147 4148 const MCOperand &MO = Inst.getOperand(OpIdx); 4149 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4150 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4151 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4152 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4153 if (NumLiterals == 0 || LiteralValue != Value) { 4154 LiteralValue = Value; 4155 ++NumLiterals; 4156 } 4157 } else if (MO.isExpr()) { 4158 ++NumExprs; 4159 } 4160 } 4161 } 4162 4163 return NumLiterals + NumExprs <= 1; 4164 } 4165 4166 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4167 const unsigned Opc = Inst.getOpcode(); 4168 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4169 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4170 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4171 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4172 4173 if (OpSel & ~3) 4174 return false; 4175 } 4176 4177 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4178 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4179 if (OpSelIdx != -1) { 4180 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4181 return false; 4182 } 4183 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4184 if (OpSelHiIdx != -1) { 4185 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4186 return false; 4187 } 4188 } 4189 4190 return true; 4191 } 4192 4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4194 const OperandVector &Operands) { 4195 const unsigned Opc = Inst.getOpcode(); 4196 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4197 if (DppCtrlIdx < 0) 4198 return true; 4199 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4200 4201 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4202 // DPP64 is supported for row_newbcast only. 4203 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4204 if (Src0Idx >= 0 && 4205 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4206 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4207 Error(S, "64 bit dpp only supports row_newbcast"); 4208 return false; 4209 } 4210 } 4211 4212 return true; 4213 } 4214 4215 // Check if VCC register matches wavefront size 4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4217 auto FB = getFeatureBits(); 4218 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4219 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4220 } 4221 4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4224 const OperandVector &Operands) { 4225 unsigned Opcode = Inst.getOpcode(); 4226 const MCInstrDesc &Desc = MII.get(Opcode); 4227 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4228 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4229 ImmIdx == -1) 4230 return true; 4231 4232 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4233 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4234 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4235 4236 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4237 4238 unsigned NumExprs = 0; 4239 unsigned NumLiterals = 0; 4240 uint32_t LiteralValue; 4241 4242 for (int OpIdx : OpIndices) { 4243 if (OpIdx == -1) 4244 continue; 4245 4246 const MCOperand &MO = Inst.getOperand(OpIdx); 4247 if (!MO.isImm() && !MO.isExpr()) 4248 continue; 4249 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4250 continue; 4251 4252 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4253 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4254 Error(getConstLoc(Operands), 4255 "inline constants are not allowed for this operand"); 4256 return false; 4257 } 4258 4259 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4260 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4261 if (NumLiterals == 0 || LiteralValue != Value) { 4262 LiteralValue = Value; 4263 ++NumLiterals; 4264 } 4265 } else if (MO.isExpr()) { 4266 ++NumExprs; 4267 } 4268 } 4269 NumLiterals += NumExprs; 4270 4271 if (!NumLiterals) 4272 return true; 4273 4274 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4275 Error(getLitLoc(Operands), "literal operands are not supported"); 4276 return false; 4277 } 4278 4279 if (NumLiterals > 1) { 4280 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4281 return false; 4282 } 4283 4284 return true; 4285 } 4286 4287 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4288 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4289 const MCRegisterInfo *MRI) { 4290 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4291 if (OpIdx < 0) 4292 return -1; 4293 4294 const MCOperand &Op = Inst.getOperand(OpIdx); 4295 if (!Op.isReg()) 4296 return -1; 4297 4298 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4299 auto Reg = Sub ? Sub : Op.getReg(); 4300 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4301 return AGPR32.contains(Reg) ? 1 : 0; 4302 } 4303 4304 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4305 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4306 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4307 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4308 SIInstrFlags::DS)) == 0) 4309 return true; 4310 4311 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4312 : AMDGPU::OpName::vdata; 4313 4314 const MCRegisterInfo *MRI = getMRI(); 4315 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4316 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4317 4318 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4319 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4320 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4321 return false; 4322 } 4323 4324 auto FB = getFeatureBits(); 4325 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4326 if (DataAreg < 0 || DstAreg < 0) 4327 return true; 4328 return DstAreg == DataAreg; 4329 } 4330 4331 return DstAreg < 1 && DataAreg < 1; 4332 } 4333 4334 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4335 auto FB = getFeatureBits(); 4336 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4337 return true; 4338 4339 const MCRegisterInfo *MRI = getMRI(); 4340 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4341 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4342 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4343 const MCOperand &Op = Inst.getOperand(I); 4344 if (!Op.isReg()) 4345 continue; 4346 4347 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4348 if (!Sub) 4349 continue; 4350 4351 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4352 return false; 4353 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4354 return false; 4355 } 4356 4357 return true; 4358 } 4359 4360 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4363 if (Op.isBLGP()) 4364 return Op.getStartLoc(); 4365 } 4366 return SMLoc(); 4367 } 4368 4369 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4370 const OperandVector &Operands) { 4371 unsigned Opc = Inst.getOpcode(); 4372 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4373 if (BlgpIdx == -1) 4374 return true; 4375 SMLoc BLGPLoc = getBLGPLoc(Operands); 4376 if (!BLGPLoc.isValid()) 4377 return true; 4378 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4379 auto FB = getFeatureBits(); 4380 bool UsesNeg = false; 4381 if (FB[AMDGPU::FeatureGFX940Insts]) { 4382 switch (Opc) { 4383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4387 UsesNeg = true; 4388 } 4389 } 4390 4391 if (IsNeg == UsesNeg) 4392 return true; 4393 4394 Error(BLGPLoc, 4395 UsesNeg ? "invalid modifier: blgp is not supported" 4396 : "invalid modifier: neg is not supported"); 4397 4398 return false; 4399 } 4400 4401 // gfx90a has an undocumented limitation: 4402 // DS_GWS opcodes must use even aligned registers. 4403 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4404 const OperandVector &Operands) { 4405 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4406 return true; 4407 4408 int Opc = Inst.getOpcode(); 4409 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4410 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4411 return true; 4412 4413 const MCRegisterInfo *MRI = getMRI(); 4414 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4415 int Data0Pos = 4416 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4417 assert(Data0Pos != -1); 4418 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4419 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4420 if (RegIdx & 1) { 4421 SMLoc RegLoc = getRegLoc(Reg, Operands); 4422 Error(RegLoc, "vgpr must be even aligned"); 4423 return false; 4424 } 4425 4426 return true; 4427 } 4428 4429 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4430 const OperandVector &Operands, 4431 const SMLoc &IDLoc) { 4432 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4433 AMDGPU::OpName::cpol); 4434 if (CPolPos == -1) 4435 return true; 4436 4437 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4438 4439 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4440 if (TSFlags & SIInstrFlags::SMRD) { 4441 if (CPol && (isSI() || isCI())) { 4442 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4443 Error(S, "cache policy is not supported for SMRD instructions"); 4444 return false; 4445 } 4446 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4447 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4448 return false; 4449 } 4450 } 4451 4452 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4454 StringRef CStr(S.getPointer()); 4455 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4456 Error(S, "scc is not supported on this GPU"); 4457 return false; 4458 } 4459 4460 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4461 return true; 4462 4463 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4464 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4465 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4466 : "instruction must use glc"); 4467 return false; 4468 } 4469 } else { 4470 if (CPol & CPol::GLC) { 4471 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4472 StringRef CStr(S.getPointer()); 4473 S = SMLoc::getFromPointer( 4474 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4475 Error(S, isGFX940() ? "instruction must not use sc0" 4476 : "instruction must not use glc"); 4477 return false; 4478 } 4479 } 4480 4481 return true; 4482 } 4483 4484 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4485 const OperandVector &Operands, 4486 const SMLoc &IDLoc) { 4487 if (isGFX940()) 4488 return true; 4489 4490 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4491 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4492 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4493 return true; 4494 // This is FLAT LDS DMA. 4495 4496 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4497 StringRef CStr(S.getPointer()); 4498 if (!CStr.startswith("lds")) { 4499 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4500 // And LDS version should have 'lds' modifier, but it follows optional 4501 // operands so its absense is ignored by the matcher. 4502 Error(IDLoc, "invalid operands for instruction"); 4503 return false; 4504 } 4505 4506 return true; 4507 } 4508 4509 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4510 if (!isGFX11Plus()) 4511 return true; 4512 for (auto &Operand : Operands) { 4513 if (!Operand->isReg()) 4514 continue; 4515 unsigned Reg = Operand->getReg(); 4516 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4517 Error(getRegLoc(Reg, Operands), 4518 "execz and vccz are not supported on this GPU"); 4519 return false; 4520 } 4521 } 4522 return true; 4523 } 4524 4525 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4526 const SMLoc &IDLoc, 4527 const OperandVector &Operands) { 4528 if (auto ErrMsg = validateLdsDirect(Inst)) { 4529 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4530 return false; 4531 } 4532 if (!validateSOPLiteral(Inst)) { 4533 Error(getLitLoc(Operands), 4534 "only one literal operand is allowed"); 4535 return false; 4536 } 4537 if (!validateVOPLiteral(Inst, Operands)) { 4538 return false; 4539 } 4540 if (!validateConstantBusLimitations(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateIntClampSupported(Inst)) { 4547 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4548 "integer clamping is not supported on this GPU"); 4549 return false; 4550 } 4551 if (!validateOpSel(Inst)) { 4552 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4553 "invalid op_sel operand"); 4554 return false; 4555 } 4556 if (!validateDPP(Inst, Operands)) { 4557 return false; 4558 } 4559 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4560 if (!validateMIMGD16(Inst)) { 4561 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4562 "d16 modifier is not supported on this GPU"); 4563 return false; 4564 } 4565 if (!validateMIMGDim(Inst)) { 4566 Error(IDLoc, "dim modifier is required on this GPU"); 4567 return false; 4568 } 4569 if (!validateMIMGMSAA(Inst)) { 4570 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4571 "invalid dim; must be MSAA type"); 4572 return false; 4573 } 4574 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4575 Error(IDLoc, *ErrMsg); 4576 return false; 4577 } 4578 if (!validateMIMGAddrSize(Inst)) { 4579 Error(IDLoc, 4580 "image address size does not match dim and a16"); 4581 return false; 4582 } 4583 if (!validateMIMGAtomicDMask(Inst)) { 4584 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4585 "invalid atomic image dmask"); 4586 return false; 4587 } 4588 if (!validateMIMGGatherDMask(Inst)) { 4589 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4590 "invalid image_gather dmask: only one bit must be set"); 4591 return false; 4592 } 4593 if (!validateMovrels(Inst, Operands)) { 4594 return false; 4595 } 4596 if (!validateFlatOffset(Inst, Operands)) { 4597 return false; 4598 } 4599 if (!validateSMEMOffset(Inst, Operands)) { 4600 return false; 4601 } 4602 if (!validateMAIAccWrite(Inst, Operands)) { 4603 return false; 4604 } 4605 if (!validateMFMA(Inst, Operands)) { 4606 return false; 4607 } 4608 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4609 return false; 4610 } 4611 4612 if (!validateAGPRLdSt(Inst)) { 4613 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4614 ? "invalid register class: data and dst should be all VGPR or AGPR" 4615 : "invalid register class: agpr loads and stores not supported on this GPU" 4616 ); 4617 return false; 4618 } 4619 if (!validateVGPRAlign(Inst)) { 4620 Error(IDLoc, 4621 "invalid register class: vgpr tuples must be 64 bit aligned"); 4622 return false; 4623 } 4624 if (!validateGWS(Inst, Operands)) { 4625 return false; 4626 } 4627 4628 if (!validateBLGP(Inst, Operands)) { 4629 return false; 4630 } 4631 4632 if (!validateDivScale(Inst)) { 4633 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4634 return false; 4635 } 4636 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4637 return false; 4638 } 4639 if (!validateExeczVcczOperands(Operands)) { 4640 return false; 4641 } 4642 4643 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4644 return false; 4645 } 4646 4647 return true; 4648 } 4649 4650 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4651 const FeatureBitset &FBS, 4652 unsigned VariantID = 0); 4653 4654 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4655 const FeatureBitset &AvailableFeatures, 4656 unsigned VariantID); 4657 4658 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4659 const FeatureBitset &FBS) { 4660 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4661 } 4662 4663 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4664 const FeatureBitset &FBS, 4665 ArrayRef<unsigned> Variants) { 4666 for (auto Variant : Variants) { 4667 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4668 return true; 4669 } 4670 4671 return false; 4672 } 4673 4674 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4675 const SMLoc &IDLoc) { 4676 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4677 4678 // Check if requested instruction variant is supported. 4679 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4680 return false; 4681 4682 // This instruction is not supported. 4683 // Clear any other pending errors because they are no longer relevant. 4684 getParser().clearPendingErrors(); 4685 4686 // Requested instruction variant is not supported. 4687 // Check if any other variants are supported. 4688 StringRef VariantName = getMatchedVariantName(); 4689 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4690 return Error(IDLoc, 4691 Twine(VariantName, 4692 " variant of this instruction is not supported")); 4693 } 4694 4695 // Finally check if this instruction is supported on any other GPU. 4696 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4697 return Error(IDLoc, "instruction not supported on this GPU"); 4698 } 4699 4700 // Instruction not supported on any GPU. Probably a typo. 4701 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4702 return Error(IDLoc, "invalid instruction" + Suggestion); 4703 } 4704 4705 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4706 OperandVector &Operands, 4707 MCStreamer &Out, 4708 uint64_t &ErrorInfo, 4709 bool MatchingInlineAsm) { 4710 MCInst Inst; 4711 unsigned Result = Match_Success; 4712 for (auto Variant : getMatchedVariants()) { 4713 uint64_t EI; 4714 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4715 Variant); 4716 // We order match statuses from least to most specific. We use most specific 4717 // status as resulting 4718 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4719 if ((R == Match_Success) || 4720 (R == Match_PreferE32) || 4721 (R == Match_MissingFeature && Result != Match_PreferE32) || 4722 (R == Match_InvalidOperand && Result != Match_MissingFeature 4723 && Result != Match_PreferE32) || 4724 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4725 && Result != Match_MissingFeature 4726 && Result != Match_PreferE32)) { 4727 Result = R; 4728 ErrorInfo = EI; 4729 } 4730 if (R == Match_Success) 4731 break; 4732 } 4733 4734 if (Result == Match_Success) { 4735 if (!validateInstruction(Inst, IDLoc, Operands)) { 4736 return true; 4737 } 4738 Inst.setLoc(IDLoc); 4739 Out.emitInstruction(Inst, getSTI()); 4740 return false; 4741 } 4742 4743 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4744 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4745 return true; 4746 } 4747 4748 switch (Result) { 4749 default: break; 4750 case Match_MissingFeature: 4751 // It has been verified that the specified instruction 4752 // mnemonic is valid. A match was found but it requires 4753 // features which are not supported on this GPU. 4754 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4755 4756 case Match_InvalidOperand: { 4757 SMLoc ErrorLoc = IDLoc; 4758 if (ErrorInfo != ~0ULL) { 4759 if (ErrorInfo >= Operands.size()) { 4760 return Error(IDLoc, "too few operands for instruction"); 4761 } 4762 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4763 if (ErrorLoc == SMLoc()) 4764 ErrorLoc = IDLoc; 4765 } 4766 return Error(ErrorLoc, "invalid operand for instruction"); 4767 } 4768 4769 case Match_PreferE32: 4770 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4771 "should be encoded as e32"); 4772 case Match_MnemonicFail: 4773 llvm_unreachable("Invalid instructions should have been handled already"); 4774 } 4775 llvm_unreachable("Implement any new match types added!"); 4776 } 4777 4778 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4779 int64_t Tmp = -1; 4780 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4781 return true; 4782 } 4783 if (getParser().parseAbsoluteExpression(Tmp)) { 4784 return true; 4785 } 4786 Ret = static_cast<uint32_t>(Tmp); 4787 return false; 4788 } 4789 4790 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4791 uint32_t &Minor) { 4792 if (ParseAsAbsoluteExpression(Major)) 4793 return TokError("invalid major version"); 4794 4795 if (!trySkipToken(AsmToken::Comma)) 4796 return TokError("minor version number required, comma expected"); 4797 4798 if (ParseAsAbsoluteExpression(Minor)) 4799 return TokError("invalid minor version"); 4800 4801 return false; 4802 } 4803 4804 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4805 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4806 return TokError("directive only supported for amdgcn architecture"); 4807 4808 std::string TargetIDDirective; 4809 SMLoc TargetStart = getTok().getLoc(); 4810 if (getParser().parseEscapedString(TargetIDDirective)) 4811 return true; 4812 4813 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4814 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4815 return getParser().Error(TargetRange.Start, 4816 (Twine(".amdgcn_target directive's target id ") + 4817 Twine(TargetIDDirective) + 4818 Twine(" does not match the specified target id ") + 4819 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4820 4821 return false; 4822 } 4823 4824 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4825 return Error(Range.Start, "value out of range", Range); 4826 } 4827 4828 bool AMDGPUAsmParser::calculateGPRBlocks( 4829 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4830 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4831 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4832 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4833 // TODO(scott.linder): These calculations are duplicated from 4834 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4835 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4836 4837 unsigned NumVGPRs = NextFreeVGPR; 4838 unsigned NumSGPRs = NextFreeSGPR; 4839 4840 if (Version.Major >= 10) 4841 NumSGPRs = 0; 4842 else { 4843 unsigned MaxAddressableNumSGPRs = 4844 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4845 4846 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4847 NumSGPRs > MaxAddressableNumSGPRs) 4848 return OutOfRangeError(SGPRRange); 4849 4850 NumSGPRs += 4851 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4852 4853 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4854 NumSGPRs > MaxAddressableNumSGPRs) 4855 return OutOfRangeError(SGPRRange); 4856 4857 if (Features.test(FeatureSGPRInitBug)) 4858 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4859 } 4860 4861 VGPRBlocks = 4862 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4863 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4864 4865 return false; 4866 } 4867 4868 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4869 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4870 return TokError("directive only supported for amdgcn architecture"); 4871 4872 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4873 return TokError("directive only supported for amdhsa OS"); 4874 4875 StringRef KernelName; 4876 if (getParser().parseIdentifier(KernelName)) 4877 return true; 4878 4879 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4880 4881 StringSet<> Seen; 4882 4883 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4884 4885 SMRange VGPRRange; 4886 uint64_t NextFreeVGPR = 0; 4887 uint64_t AccumOffset = 0; 4888 uint64_t SharedVGPRCount = 0; 4889 SMRange SGPRRange; 4890 uint64_t NextFreeSGPR = 0; 4891 4892 // Count the number of user SGPRs implied from the enabled feature bits. 4893 unsigned ImpliedUserSGPRCount = 0; 4894 4895 // Track if the asm explicitly contains the directive for the user SGPR 4896 // count. 4897 Optional<unsigned> ExplicitUserSGPRCount; 4898 bool ReserveVCC = true; 4899 bool ReserveFlatScr = true; 4900 Optional<bool> EnableWavefrontSize32; 4901 4902 while (true) { 4903 while (trySkipToken(AsmToken::EndOfStatement)); 4904 4905 StringRef ID; 4906 SMRange IDRange = getTok().getLocRange(); 4907 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4908 return true; 4909 4910 if (ID == ".end_amdhsa_kernel") 4911 break; 4912 4913 if (!Seen.insert(ID).second) 4914 return TokError(".amdhsa_ directives cannot be repeated"); 4915 4916 SMLoc ValStart = getLoc(); 4917 int64_t IVal; 4918 if (getParser().parseAbsoluteExpression(IVal)) 4919 return true; 4920 SMLoc ValEnd = getLoc(); 4921 SMRange ValRange = SMRange(ValStart, ValEnd); 4922 4923 if (IVal < 0) 4924 return OutOfRangeError(ValRange); 4925 4926 uint64_t Val = IVal; 4927 4928 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4929 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4930 return OutOfRangeError(RANGE); \ 4931 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4932 4933 if (ID == ".amdhsa_group_segment_fixed_size") { 4934 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4935 return OutOfRangeError(ValRange); 4936 KD.group_segment_fixed_size = Val; 4937 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4938 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4939 return OutOfRangeError(ValRange); 4940 KD.private_segment_fixed_size = Val; 4941 } else if (ID == ".amdhsa_kernarg_size") { 4942 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4943 return OutOfRangeError(ValRange); 4944 KD.kernarg_size = Val; 4945 } else if (ID == ".amdhsa_user_sgpr_count") { 4946 ExplicitUserSGPRCount = Val; 4947 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4948 if (hasArchitectedFlatScratch()) 4949 return Error(IDRange.Start, 4950 "directive is not supported with architected flat scratch", 4951 IDRange); 4952 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4953 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4954 Val, ValRange); 4955 if (Val) 4956 ImpliedUserSGPRCount += 4; 4957 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4958 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4959 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4960 ValRange); 4961 if (Val) 4962 ImpliedUserSGPRCount += 2; 4963 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4964 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4965 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4966 ValRange); 4967 if (Val) 4968 ImpliedUserSGPRCount += 2; 4969 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4970 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4971 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4972 Val, ValRange); 4973 if (Val) 4974 ImpliedUserSGPRCount += 2; 4975 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4976 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4977 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4978 ValRange); 4979 if (Val) 4980 ImpliedUserSGPRCount += 2; 4981 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4982 if (hasArchitectedFlatScratch()) 4983 return Error(IDRange.Start, 4984 "directive is not supported with architected flat scratch", 4985 IDRange); 4986 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4987 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4988 ValRange); 4989 if (Val) 4990 ImpliedUserSGPRCount += 2; 4991 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4992 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4993 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4994 Val, ValRange); 4995 if (Val) 4996 ImpliedUserSGPRCount += 1; 4997 } else if (ID == ".amdhsa_wavefront_size32") { 4998 if (IVersion.Major < 10) 4999 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5000 EnableWavefrontSize32 = Val; 5001 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5002 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5003 Val, ValRange); 5004 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5005 if (hasArchitectedFlatScratch()) 5006 return Error(IDRange.Start, 5007 "directive is not supported with architected flat scratch", 5008 IDRange); 5009 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5010 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5011 } else if (ID == ".amdhsa_enable_private_segment") { 5012 if (!hasArchitectedFlatScratch()) 5013 return Error( 5014 IDRange.Start, 5015 "directive is not supported without architected flat scratch", 5016 IDRange); 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5018 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5019 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5020 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5021 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5022 ValRange); 5023 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5025 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5026 ValRange); 5027 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5028 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5029 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5030 ValRange); 5031 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5032 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5033 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5034 ValRange); 5035 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5036 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5037 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5038 ValRange); 5039 } else if (ID == ".amdhsa_next_free_vgpr") { 5040 VGPRRange = ValRange; 5041 NextFreeVGPR = Val; 5042 } else if (ID == ".amdhsa_next_free_sgpr") { 5043 SGPRRange = ValRange; 5044 NextFreeSGPR = Val; 5045 } else if (ID == ".amdhsa_accum_offset") { 5046 if (!isGFX90A()) 5047 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5048 AccumOffset = Val; 5049 } else if (ID == ".amdhsa_reserve_vcc") { 5050 if (!isUInt<1>(Val)) 5051 return OutOfRangeError(ValRange); 5052 ReserveVCC = Val; 5053 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5054 if (IVersion.Major < 7) 5055 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5056 if (hasArchitectedFlatScratch()) 5057 return Error(IDRange.Start, 5058 "directive is not supported with architected flat scratch", 5059 IDRange); 5060 if (!isUInt<1>(Val)) 5061 return OutOfRangeError(ValRange); 5062 ReserveFlatScr = Val; 5063 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5064 if (IVersion.Major < 8) 5065 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5066 if (!isUInt<1>(Val)) 5067 return OutOfRangeError(ValRange); 5068 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5069 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5070 IDRange); 5071 } else if (ID == ".amdhsa_float_round_mode_32") { 5072 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5073 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5074 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5076 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5077 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5079 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5080 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5082 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5083 ValRange); 5084 } else if (ID == ".amdhsa_dx10_clamp") { 5085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5086 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5087 } else if (ID == ".amdhsa_ieee_mode") { 5088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5089 Val, ValRange); 5090 } else if (ID == ".amdhsa_fp16_overflow") { 5091 if (IVersion.Major < 9) 5092 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5094 ValRange); 5095 } else if (ID == ".amdhsa_tg_split") { 5096 if (!isGFX90A()) 5097 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5098 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5099 ValRange); 5100 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5101 if (IVersion.Major < 10) 5102 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5104 ValRange); 5105 } else if (ID == ".amdhsa_memory_ordered") { 5106 if (IVersion.Major < 10) 5107 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5109 ValRange); 5110 } else if (ID == ".amdhsa_forward_progress") { 5111 if (IVersion.Major < 10) 5112 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5113 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5114 ValRange); 5115 } else if (ID == ".amdhsa_shared_vgpr_count") { 5116 if (IVersion.Major < 10) 5117 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5118 SharedVGPRCount = Val; 5119 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5120 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5121 ValRange); 5122 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5123 PARSE_BITS_ENTRY( 5124 KD.compute_pgm_rsrc2, 5125 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5126 ValRange); 5127 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5128 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5129 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5130 Val, ValRange); 5131 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5132 PARSE_BITS_ENTRY( 5133 KD.compute_pgm_rsrc2, 5134 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5135 ValRange); 5136 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5137 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5138 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5139 Val, ValRange); 5140 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5141 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5142 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5143 Val, ValRange); 5144 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5145 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5146 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5147 Val, ValRange); 5148 } else if (ID == ".amdhsa_exception_int_div_zero") { 5149 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5150 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5151 Val, ValRange); 5152 } else { 5153 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5154 } 5155 5156 #undef PARSE_BITS_ENTRY 5157 } 5158 5159 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5160 return TokError(".amdhsa_next_free_vgpr directive is required"); 5161 5162 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5163 return TokError(".amdhsa_next_free_sgpr directive is required"); 5164 5165 unsigned VGPRBlocks; 5166 unsigned SGPRBlocks; 5167 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5168 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5169 EnableWavefrontSize32, NextFreeVGPR, 5170 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5171 SGPRBlocks)) 5172 return true; 5173 5174 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5175 VGPRBlocks)) 5176 return OutOfRangeError(VGPRRange); 5177 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5178 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5179 5180 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5181 SGPRBlocks)) 5182 return OutOfRangeError(SGPRRange); 5183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5184 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5185 SGPRBlocks); 5186 5187 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5188 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5189 "enabled user SGPRs"); 5190 5191 unsigned UserSGPRCount = 5192 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5193 5194 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5195 return TokError("too many user SGPRs enabled"); 5196 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5197 UserSGPRCount); 5198 5199 if (isGFX90A()) { 5200 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5201 return TokError(".amdhsa_accum_offset directive is required"); 5202 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5203 return TokError("accum_offset should be in range [4..256] in " 5204 "increments of 4"); 5205 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5206 return TokError("accum_offset exceeds total VGPR allocation"); 5207 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5208 (AccumOffset / 4 - 1)); 5209 } 5210 5211 if (IVersion.Major == 10) { 5212 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5213 if (SharedVGPRCount && EnableWavefrontSize32) { 5214 return TokError("shared_vgpr_count directive not valid on " 5215 "wavefront size 32"); 5216 } 5217 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5218 return TokError("shared_vgpr_count*2 + " 5219 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5220 "exceed 63\n"); 5221 } 5222 } 5223 5224 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5225 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5226 ReserveFlatScr); 5227 return false; 5228 } 5229 5230 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5231 uint32_t Major; 5232 uint32_t Minor; 5233 5234 if (ParseDirectiveMajorMinor(Major, Minor)) 5235 return true; 5236 5237 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5238 return false; 5239 } 5240 5241 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5242 uint32_t Major; 5243 uint32_t Minor; 5244 uint32_t Stepping; 5245 StringRef VendorName; 5246 StringRef ArchName; 5247 5248 // If this directive has no arguments, then use the ISA version for the 5249 // targeted GPU. 5250 if (isToken(AsmToken::EndOfStatement)) { 5251 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5252 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5253 ISA.Stepping, 5254 "AMD", "AMDGPU"); 5255 return false; 5256 } 5257 5258 if (ParseDirectiveMajorMinor(Major, Minor)) 5259 return true; 5260 5261 if (!trySkipToken(AsmToken::Comma)) 5262 return TokError("stepping version number required, comma expected"); 5263 5264 if (ParseAsAbsoluteExpression(Stepping)) 5265 return TokError("invalid stepping version"); 5266 5267 if (!trySkipToken(AsmToken::Comma)) 5268 return TokError("vendor name required, comma expected"); 5269 5270 if (!parseString(VendorName, "invalid vendor name")) 5271 return true; 5272 5273 if (!trySkipToken(AsmToken::Comma)) 5274 return TokError("arch name required, comma expected"); 5275 5276 if (!parseString(ArchName, "invalid arch name")) 5277 return true; 5278 5279 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5280 VendorName, ArchName); 5281 return false; 5282 } 5283 5284 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5285 amd_kernel_code_t &Header) { 5286 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5287 // assembly for backwards compatibility. 5288 if (ID == "max_scratch_backing_memory_byte_size") { 5289 Parser.eatToEndOfStatement(); 5290 return false; 5291 } 5292 5293 SmallString<40> ErrStr; 5294 raw_svector_ostream Err(ErrStr); 5295 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5296 return TokError(Err.str()); 5297 } 5298 Lex(); 5299 5300 if (ID == "enable_wavefront_size32") { 5301 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5302 if (!isGFX10Plus()) 5303 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5304 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5305 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5306 } else { 5307 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5308 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5309 } 5310 } 5311 5312 if (ID == "wavefront_size") { 5313 if (Header.wavefront_size == 5) { 5314 if (!isGFX10Plus()) 5315 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5316 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5317 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5318 } else if (Header.wavefront_size == 6) { 5319 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5320 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5321 } 5322 } 5323 5324 if (ID == "enable_wgp_mode") { 5325 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5326 !isGFX10Plus()) 5327 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5328 } 5329 5330 if (ID == "enable_mem_ordered") { 5331 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5332 !isGFX10Plus()) 5333 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5334 } 5335 5336 if (ID == "enable_fwd_progress") { 5337 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5338 !isGFX10Plus()) 5339 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5340 } 5341 5342 return false; 5343 } 5344 5345 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5346 amd_kernel_code_t Header; 5347 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5348 5349 while (true) { 5350 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5351 // will set the current token to EndOfStatement. 5352 while(trySkipToken(AsmToken::EndOfStatement)); 5353 5354 StringRef ID; 5355 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5356 return true; 5357 5358 if (ID == ".end_amd_kernel_code_t") 5359 break; 5360 5361 if (ParseAMDKernelCodeTValue(ID, Header)) 5362 return true; 5363 } 5364 5365 getTargetStreamer().EmitAMDKernelCodeT(Header); 5366 5367 return false; 5368 } 5369 5370 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5371 StringRef KernelName; 5372 if (!parseId(KernelName, "expected symbol name")) 5373 return true; 5374 5375 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5376 ELF::STT_AMDGPU_HSA_KERNEL); 5377 5378 KernelScope.initialize(getContext()); 5379 return false; 5380 } 5381 5382 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5383 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5384 return Error(getLoc(), 5385 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5386 "architectures"); 5387 } 5388 5389 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5390 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5391 return Error(getParser().getTok().getLoc(), "target id must match options"); 5392 5393 getTargetStreamer().EmitISAVersion(); 5394 Lex(); 5395 5396 return false; 5397 } 5398 5399 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5400 const char *AssemblerDirectiveBegin; 5401 const char *AssemblerDirectiveEnd; 5402 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5403 isHsaAbiVersion3AndAbove(&getSTI()) 5404 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5405 HSAMD::V3::AssemblerDirectiveEnd) 5406 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5407 HSAMD::AssemblerDirectiveEnd); 5408 5409 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5410 return Error(getLoc(), 5411 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5412 "not available on non-amdhsa OSes")).str()); 5413 } 5414 5415 std::string HSAMetadataString; 5416 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5417 HSAMetadataString)) 5418 return true; 5419 5420 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5421 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5422 return Error(getLoc(), "invalid HSA metadata"); 5423 } else { 5424 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5425 return Error(getLoc(), "invalid HSA metadata"); 5426 } 5427 5428 return false; 5429 } 5430 5431 /// Common code to parse out a block of text (typically YAML) between start and 5432 /// end directives. 5433 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5434 const char *AssemblerDirectiveEnd, 5435 std::string &CollectString) { 5436 5437 raw_string_ostream CollectStream(CollectString); 5438 5439 getLexer().setSkipSpace(false); 5440 5441 bool FoundEnd = false; 5442 while (!isToken(AsmToken::Eof)) { 5443 while (isToken(AsmToken::Space)) { 5444 CollectStream << getTokenStr(); 5445 Lex(); 5446 } 5447 5448 if (trySkipId(AssemblerDirectiveEnd)) { 5449 FoundEnd = true; 5450 break; 5451 } 5452 5453 CollectStream << Parser.parseStringToEndOfStatement() 5454 << getContext().getAsmInfo()->getSeparatorString(); 5455 5456 Parser.eatToEndOfStatement(); 5457 } 5458 5459 getLexer().setSkipSpace(true); 5460 5461 if (isToken(AsmToken::Eof) && !FoundEnd) { 5462 return TokError(Twine("expected directive ") + 5463 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5464 } 5465 5466 CollectStream.flush(); 5467 return false; 5468 } 5469 5470 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5471 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5472 std::string String; 5473 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5474 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5475 return true; 5476 5477 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5478 if (!PALMetadata->setFromString(String)) 5479 return Error(getLoc(), "invalid PAL metadata"); 5480 return false; 5481 } 5482 5483 /// Parse the assembler directive for old linear-format PAL metadata. 5484 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5485 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5486 return Error(getLoc(), 5487 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5488 "not available on non-amdpal OSes")).str()); 5489 } 5490 5491 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5492 PALMetadata->setLegacy(); 5493 for (;;) { 5494 uint32_t Key, Value; 5495 if (ParseAsAbsoluteExpression(Key)) { 5496 return TokError(Twine("invalid value in ") + 5497 Twine(PALMD::AssemblerDirective)); 5498 } 5499 if (!trySkipToken(AsmToken::Comma)) { 5500 return TokError(Twine("expected an even number of values in ") + 5501 Twine(PALMD::AssemblerDirective)); 5502 } 5503 if (ParseAsAbsoluteExpression(Value)) { 5504 return TokError(Twine("invalid value in ") + 5505 Twine(PALMD::AssemblerDirective)); 5506 } 5507 PALMetadata->setRegister(Key, Value); 5508 if (!trySkipToken(AsmToken::Comma)) 5509 break; 5510 } 5511 return false; 5512 } 5513 5514 /// ParseDirectiveAMDGPULDS 5515 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5516 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5517 if (getParser().checkForValidSection()) 5518 return true; 5519 5520 StringRef Name; 5521 SMLoc NameLoc = getLoc(); 5522 if (getParser().parseIdentifier(Name)) 5523 return TokError("expected identifier in directive"); 5524 5525 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5526 if (parseToken(AsmToken::Comma, "expected ','")) 5527 return true; 5528 5529 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5530 5531 int64_t Size; 5532 SMLoc SizeLoc = getLoc(); 5533 if (getParser().parseAbsoluteExpression(Size)) 5534 return true; 5535 if (Size < 0) 5536 return Error(SizeLoc, "size must be non-negative"); 5537 if (Size > LocalMemorySize) 5538 return Error(SizeLoc, "size is too large"); 5539 5540 int64_t Alignment = 4; 5541 if (trySkipToken(AsmToken::Comma)) { 5542 SMLoc AlignLoc = getLoc(); 5543 if (getParser().parseAbsoluteExpression(Alignment)) 5544 return true; 5545 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5546 return Error(AlignLoc, "alignment must be a power of two"); 5547 5548 // Alignment larger than the size of LDS is possible in theory, as long 5549 // as the linker manages to place to symbol at address 0, but we do want 5550 // to make sure the alignment fits nicely into a 32-bit integer. 5551 if (Alignment >= 1u << 31) 5552 return Error(AlignLoc, "alignment is too large"); 5553 } 5554 5555 if (parseEOL()) 5556 return true; 5557 5558 Symbol->redefineIfPossible(); 5559 if (!Symbol->isUndefined()) 5560 return Error(NameLoc, "invalid symbol redefinition"); 5561 5562 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5563 return false; 5564 } 5565 5566 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5567 StringRef IDVal = DirectiveID.getString(); 5568 5569 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5570 if (IDVal == ".amdhsa_kernel") 5571 return ParseDirectiveAMDHSAKernel(); 5572 5573 // TODO: Restructure/combine with PAL metadata directive. 5574 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5575 return ParseDirectiveHSAMetadata(); 5576 } else { 5577 if (IDVal == ".hsa_code_object_version") 5578 return ParseDirectiveHSACodeObjectVersion(); 5579 5580 if (IDVal == ".hsa_code_object_isa") 5581 return ParseDirectiveHSACodeObjectISA(); 5582 5583 if (IDVal == ".amd_kernel_code_t") 5584 return ParseDirectiveAMDKernelCodeT(); 5585 5586 if (IDVal == ".amdgpu_hsa_kernel") 5587 return ParseDirectiveAMDGPUHsaKernel(); 5588 5589 if (IDVal == ".amd_amdgpu_isa") 5590 return ParseDirectiveISAVersion(); 5591 5592 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5593 return ParseDirectiveHSAMetadata(); 5594 } 5595 5596 if (IDVal == ".amdgcn_target") 5597 return ParseDirectiveAMDGCNTarget(); 5598 5599 if (IDVal == ".amdgpu_lds") 5600 return ParseDirectiveAMDGPULDS(); 5601 5602 if (IDVal == PALMD::AssemblerDirectiveBegin) 5603 return ParseDirectivePALMetadataBegin(); 5604 5605 if (IDVal == PALMD::AssemblerDirective) 5606 return ParseDirectivePALMetadata(); 5607 5608 return true; 5609 } 5610 5611 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5612 unsigned RegNo) { 5613 5614 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5615 return isGFX9Plus(); 5616 5617 // GFX10+ has 2 more SGPRs 104 and 105. 5618 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5619 return hasSGPR104_SGPR105(); 5620 5621 switch (RegNo) { 5622 case AMDGPU::SRC_SHARED_BASE: 5623 case AMDGPU::SRC_SHARED_LIMIT: 5624 case AMDGPU::SRC_PRIVATE_BASE: 5625 case AMDGPU::SRC_PRIVATE_LIMIT: 5626 return isGFX9Plus(); 5627 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5628 return isGFX9Plus() && !isGFX11Plus(); 5629 case AMDGPU::TBA: 5630 case AMDGPU::TBA_LO: 5631 case AMDGPU::TBA_HI: 5632 case AMDGPU::TMA: 5633 case AMDGPU::TMA_LO: 5634 case AMDGPU::TMA_HI: 5635 return !isGFX9Plus(); 5636 case AMDGPU::XNACK_MASK: 5637 case AMDGPU::XNACK_MASK_LO: 5638 case AMDGPU::XNACK_MASK_HI: 5639 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5640 case AMDGPU::SGPR_NULL: 5641 return isGFX10Plus(); 5642 default: 5643 break; 5644 } 5645 5646 if (isCI()) 5647 return true; 5648 5649 if (isSI() || isGFX10Plus()) { 5650 // No flat_scr on SI. 5651 // On GFX10Plus flat scratch is not a valid register operand and can only be 5652 // accessed with s_setreg/s_getreg. 5653 switch (RegNo) { 5654 case AMDGPU::FLAT_SCR: 5655 case AMDGPU::FLAT_SCR_LO: 5656 case AMDGPU::FLAT_SCR_HI: 5657 return false; 5658 default: 5659 return true; 5660 } 5661 } 5662 5663 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5664 // SI/CI have. 5665 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5666 return hasSGPR102_SGPR103(); 5667 5668 return true; 5669 } 5670 5671 OperandMatchResultTy 5672 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5673 OperandMode Mode) { 5674 // Try to parse with a custom parser 5675 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5676 5677 // If we successfully parsed the operand or if there as an error parsing, 5678 // we are done. 5679 // 5680 // If we are parsing after we reach EndOfStatement then this means we 5681 // are appending default values to the Operands list. This is only done 5682 // by custom parser, so we shouldn't continue on to the generic parsing. 5683 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5684 isToken(AsmToken::EndOfStatement)) 5685 return ResTy; 5686 5687 SMLoc RBraceLoc; 5688 SMLoc LBraceLoc = getLoc(); 5689 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5690 unsigned Prefix = Operands.size(); 5691 5692 for (;;) { 5693 auto Loc = getLoc(); 5694 ResTy = parseReg(Operands); 5695 if (ResTy == MatchOperand_NoMatch) 5696 Error(Loc, "expected a register"); 5697 if (ResTy != MatchOperand_Success) 5698 return MatchOperand_ParseFail; 5699 5700 RBraceLoc = getLoc(); 5701 if (trySkipToken(AsmToken::RBrac)) 5702 break; 5703 5704 if (!skipToken(AsmToken::Comma, 5705 "expected a comma or a closing square bracket")) { 5706 return MatchOperand_ParseFail; 5707 } 5708 } 5709 5710 if (Operands.size() - Prefix > 1) { 5711 Operands.insert(Operands.begin() + Prefix, 5712 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5713 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5714 } 5715 5716 return MatchOperand_Success; 5717 } 5718 5719 return parseRegOrImm(Operands); 5720 } 5721 5722 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5723 // Clear any forced encodings from the previous instruction. 5724 setForcedEncodingSize(0); 5725 setForcedDPP(false); 5726 setForcedSDWA(false); 5727 5728 if (Name.endswith("_e64_dpp")) { 5729 setForcedDPP(true); 5730 setForcedEncodingSize(64); 5731 return Name.substr(0, Name.size() - 8); 5732 } else if (Name.endswith("_e64")) { 5733 setForcedEncodingSize(64); 5734 return Name.substr(0, Name.size() - 4); 5735 } else if (Name.endswith("_e32")) { 5736 setForcedEncodingSize(32); 5737 return Name.substr(0, Name.size() - 4); 5738 } else if (Name.endswith("_dpp")) { 5739 setForcedDPP(true); 5740 return Name.substr(0, Name.size() - 4); 5741 } else if (Name.endswith("_sdwa")) { 5742 setForcedSDWA(true); 5743 return Name.substr(0, Name.size() - 5); 5744 } 5745 return Name; 5746 } 5747 5748 static void applyMnemonicAliases(StringRef &Mnemonic, 5749 const FeatureBitset &Features, 5750 unsigned VariantID); 5751 5752 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5753 StringRef Name, 5754 SMLoc NameLoc, OperandVector &Operands) { 5755 // Add the instruction mnemonic 5756 Name = parseMnemonicSuffix(Name); 5757 5758 // If the target architecture uses MnemonicAlias, call it here to parse 5759 // operands correctly. 5760 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5761 5762 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5763 5764 bool IsMIMG = Name.startswith("image_"); 5765 5766 while (!trySkipToken(AsmToken::EndOfStatement)) { 5767 OperandMode Mode = OperandMode_Default; 5768 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5769 Mode = OperandMode_NSA; 5770 CPolSeen = 0; 5771 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5772 5773 if (Res != MatchOperand_Success) { 5774 checkUnsupportedInstruction(Name, NameLoc); 5775 if (!Parser.hasPendingError()) { 5776 // FIXME: use real operand location rather than the current location. 5777 StringRef Msg = 5778 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5779 "not a valid operand."; 5780 Error(getLoc(), Msg); 5781 } 5782 while (!trySkipToken(AsmToken::EndOfStatement)) { 5783 lex(); 5784 } 5785 return true; 5786 } 5787 5788 // Eat the comma or space if there is one. 5789 trySkipToken(AsmToken::Comma); 5790 } 5791 5792 return false; 5793 } 5794 5795 //===----------------------------------------------------------------------===// 5796 // Utility functions 5797 //===----------------------------------------------------------------------===// 5798 5799 OperandMatchResultTy 5800 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5801 5802 if (!trySkipId(Prefix, AsmToken::Colon)) 5803 return MatchOperand_NoMatch; 5804 5805 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5806 } 5807 5808 OperandMatchResultTy 5809 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5810 AMDGPUOperand::ImmTy ImmTy, 5811 bool (*ConvertResult)(int64_t&)) { 5812 SMLoc S = getLoc(); 5813 int64_t Value = 0; 5814 5815 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5816 if (Res != MatchOperand_Success) 5817 return Res; 5818 5819 if (ConvertResult && !ConvertResult(Value)) { 5820 Error(S, "invalid " + StringRef(Prefix) + " value."); 5821 } 5822 5823 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5824 return MatchOperand_Success; 5825 } 5826 5827 OperandMatchResultTy 5828 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5829 OperandVector &Operands, 5830 AMDGPUOperand::ImmTy ImmTy, 5831 bool (*ConvertResult)(int64_t&)) { 5832 SMLoc S = getLoc(); 5833 if (!trySkipId(Prefix, AsmToken::Colon)) 5834 return MatchOperand_NoMatch; 5835 5836 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5837 return MatchOperand_ParseFail; 5838 5839 unsigned Val = 0; 5840 const unsigned MaxSize = 4; 5841 5842 // FIXME: How to verify the number of elements matches the number of src 5843 // operands? 5844 for (int I = 0; ; ++I) { 5845 int64_t Op; 5846 SMLoc Loc = getLoc(); 5847 if (!parseExpr(Op)) 5848 return MatchOperand_ParseFail; 5849 5850 if (Op != 0 && Op != 1) { 5851 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5852 return MatchOperand_ParseFail; 5853 } 5854 5855 Val |= (Op << I); 5856 5857 if (trySkipToken(AsmToken::RBrac)) 5858 break; 5859 5860 if (I + 1 == MaxSize) { 5861 Error(getLoc(), "expected a closing square bracket"); 5862 return MatchOperand_ParseFail; 5863 } 5864 5865 if (!skipToken(AsmToken::Comma, "expected a comma")) 5866 return MatchOperand_ParseFail; 5867 } 5868 5869 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5870 return MatchOperand_Success; 5871 } 5872 5873 OperandMatchResultTy 5874 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5875 AMDGPUOperand::ImmTy ImmTy) { 5876 int64_t Bit; 5877 SMLoc S = getLoc(); 5878 5879 if (trySkipId(Name)) { 5880 Bit = 1; 5881 } else if (trySkipId("no", Name)) { 5882 Bit = 0; 5883 } else { 5884 return MatchOperand_NoMatch; 5885 } 5886 5887 if (Name == "r128" && !hasMIMG_R128()) { 5888 Error(S, "r128 modifier is not supported on this GPU"); 5889 return MatchOperand_ParseFail; 5890 } 5891 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5892 Error(S, "a16 modifier is not supported on this GPU"); 5893 return MatchOperand_ParseFail; 5894 } 5895 5896 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5897 ImmTy = AMDGPUOperand::ImmTyR128A16; 5898 5899 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5900 return MatchOperand_Success; 5901 } 5902 5903 OperandMatchResultTy 5904 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5905 unsigned CPolOn = 0; 5906 unsigned CPolOff = 0; 5907 SMLoc S = getLoc(); 5908 5909 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5910 if (isGFX940() && !Mnemo.startswith("s_")) { 5911 if (trySkipId("sc0")) 5912 CPolOn = AMDGPU::CPol::SC0; 5913 else if (trySkipId("nosc0")) 5914 CPolOff = AMDGPU::CPol::SC0; 5915 else if (trySkipId("nt")) 5916 CPolOn = AMDGPU::CPol::NT; 5917 else if (trySkipId("nont")) 5918 CPolOff = AMDGPU::CPol::NT; 5919 else if (trySkipId("sc1")) 5920 CPolOn = AMDGPU::CPol::SC1; 5921 else if (trySkipId("nosc1")) 5922 CPolOff = AMDGPU::CPol::SC1; 5923 else 5924 return MatchOperand_NoMatch; 5925 } 5926 else if (trySkipId("glc")) 5927 CPolOn = AMDGPU::CPol::GLC; 5928 else if (trySkipId("noglc")) 5929 CPolOff = AMDGPU::CPol::GLC; 5930 else if (trySkipId("slc")) 5931 CPolOn = AMDGPU::CPol::SLC; 5932 else if (trySkipId("noslc")) 5933 CPolOff = AMDGPU::CPol::SLC; 5934 else if (trySkipId("dlc")) 5935 CPolOn = AMDGPU::CPol::DLC; 5936 else if (trySkipId("nodlc")) 5937 CPolOff = AMDGPU::CPol::DLC; 5938 else if (trySkipId("scc")) 5939 CPolOn = AMDGPU::CPol::SCC; 5940 else if (trySkipId("noscc")) 5941 CPolOff = AMDGPU::CPol::SCC; 5942 else 5943 return MatchOperand_NoMatch; 5944 5945 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5946 Error(S, "dlc modifier is not supported on this GPU"); 5947 return MatchOperand_ParseFail; 5948 } 5949 5950 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5951 Error(S, "scc modifier is not supported on this GPU"); 5952 return MatchOperand_ParseFail; 5953 } 5954 5955 if (CPolSeen & (CPolOn | CPolOff)) { 5956 Error(S, "duplicate cache policy modifier"); 5957 return MatchOperand_ParseFail; 5958 } 5959 5960 CPolSeen |= (CPolOn | CPolOff); 5961 5962 for (unsigned I = 1; I != Operands.size(); ++I) { 5963 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5964 if (Op.isCPol()) { 5965 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5966 return MatchOperand_Success; 5967 } 5968 } 5969 5970 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5971 AMDGPUOperand::ImmTyCPol)); 5972 5973 return MatchOperand_Success; 5974 } 5975 5976 static void addOptionalImmOperand( 5977 MCInst& Inst, const OperandVector& Operands, 5978 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5979 AMDGPUOperand::ImmTy ImmT, 5980 int64_t Default = 0) { 5981 auto i = OptionalIdx.find(ImmT); 5982 if (i != OptionalIdx.end()) { 5983 unsigned Idx = i->second; 5984 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5985 } else { 5986 Inst.addOperand(MCOperand::createImm(Default)); 5987 } 5988 } 5989 5990 OperandMatchResultTy 5991 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5992 StringRef &Value, 5993 SMLoc &StringLoc) { 5994 if (!trySkipId(Prefix, AsmToken::Colon)) 5995 return MatchOperand_NoMatch; 5996 5997 StringLoc = getLoc(); 5998 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5999 : MatchOperand_ParseFail; 6000 } 6001 6002 //===----------------------------------------------------------------------===// 6003 // MTBUF format 6004 //===----------------------------------------------------------------------===// 6005 6006 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6007 int64_t MaxVal, 6008 int64_t &Fmt) { 6009 int64_t Val; 6010 SMLoc Loc = getLoc(); 6011 6012 auto Res = parseIntWithPrefix(Pref, Val); 6013 if (Res == MatchOperand_ParseFail) 6014 return false; 6015 if (Res == MatchOperand_NoMatch) 6016 return true; 6017 6018 if (Val < 0 || Val > MaxVal) { 6019 Error(Loc, Twine("out of range ", StringRef(Pref))); 6020 return false; 6021 } 6022 6023 Fmt = Val; 6024 return true; 6025 } 6026 6027 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6028 // values to live in a joint format operand in the MCInst encoding. 6029 OperandMatchResultTy 6030 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6031 using namespace llvm::AMDGPU::MTBUFFormat; 6032 6033 int64_t Dfmt = DFMT_UNDEF; 6034 int64_t Nfmt = NFMT_UNDEF; 6035 6036 // dfmt and nfmt can appear in either order, and each is optional. 6037 for (int I = 0; I < 2; ++I) { 6038 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6039 return MatchOperand_ParseFail; 6040 6041 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6042 return MatchOperand_ParseFail; 6043 } 6044 // Skip optional comma between dfmt/nfmt 6045 // but guard against 2 commas following each other. 6046 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6047 !peekToken().is(AsmToken::Comma)) { 6048 trySkipToken(AsmToken::Comma); 6049 } 6050 } 6051 6052 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6053 return MatchOperand_NoMatch; 6054 6055 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6056 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6057 6058 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6059 return MatchOperand_Success; 6060 } 6061 6062 OperandMatchResultTy 6063 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6064 using namespace llvm::AMDGPU::MTBUFFormat; 6065 6066 int64_t Fmt = UFMT_UNDEF; 6067 6068 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6069 return MatchOperand_ParseFail; 6070 6071 if (Fmt == UFMT_UNDEF) 6072 return MatchOperand_NoMatch; 6073 6074 Format = Fmt; 6075 return MatchOperand_Success; 6076 } 6077 6078 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6079 int64_t &Nfmt, 6080 StringRef FormatStr, 6081 SMLoc Loc) { 6082 using namespace llvm::AMDGPU::MTBUFFormat; 6083 int64_t Format; 6084 6085 Format = getDfmt(FormatStr); 6086 if (Format != DFMT_UNDEF) { 6087 Dfmt = Format; 6088 return true; 6089 } 6090 6091 Format = getNfmt(FormatStr, getSTI()); 6092 if (Format != NFMT_UNDEF) { 6093 Nfmt = Format; 6094 return true; 6095 } 6096 6097 Error(Loc, "unsupported format"); 6098 return false; 6099 } 6100 6101 OperandMatchResultTy 6102 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6103 SMLoc FormatLoc, 6104 int64_t &Format) { 6105 using namespace llvm::AMDGPU::MTBUFFormat; 6106 6107 int64_t Dfmt = DFMT_UNDEF; 6108 int64_t Nfmt = NFMT_UNDEF; 6109 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6110 return MatchOperand_ParseFail; 6111 6112 if (trySkipToken(AsmToken::Comma)) { 6113 StringRef Str; 6114 SMLoc Loc = getLoc(); 6115 if (!parseId(Str, "expected a format string") || 6116 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6117 return MatchOperand_ParseFail; 6118 } 6119 if (Dfmt == DFMT_UNDEF) { 6120 Error(Loc, "duplicate numeric format"); 6121 return MatchOperand_ParseFail; 6122 } else if (Nfmt == NFMT_UNDEF) { 6123 Error(Loc, "duplicate data format"); 6124 return MatchOperand_ParseFail; 6125 } 6126 } 6127 6128 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6129 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6130 6131 if (isGFX10Plus()) { 6132 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6133 if (Ufmt == UFMT_UNDEF) { 6134 Error(FormatLoc, "unsupported format"); 6135 return MatchOperand_ParseFail; 6136 } 6137 Format = Ufmt; 6138 } else { 6139 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6140 } 6141 6142 return MatchOperand_Success; 6143 } 6144 6145 OperandMatchResultTy 6146 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6147 SMLoc Loc, 6148 int64_t &Format) { 6149 using namespace llvm::AMDGPU::MTBUFFormat; 6150 6151 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6152 if (Id == UFMT_UNDEF) 6153 return MatchOperand_NoMatch; 6154 6155 if (!isGFX10Plus()) { 6156 Error(Loc, "unified format is not supported on this GPU"); 6157 return MatchOperand_ParseFail; 6158 } 6159 6160 Format = Id; 6161 return MatchOperand_Success; 6162 } 6163 6164 OperandMatchResultTy 6165 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6166 using namespace llvm::AMDGPU::MTBUFFormat; 6167 SMLoc Loc = getLoc(); 6168 6169 if (!parseExpr(Format)) 6170 return MatchOperand_ParseFail; 6171 if (!isValidFormatEncoding(Format, getSTI())) { 6172 Error(Loc, "out of range format"); 6173 return MatchOperand_ParseFail; 6174 } 6175 6176 return MatchOperand_Success; 6177 } 6178 6179 OperandMatchResultTy 6180 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6181 using namespace llvm::AMDGPU::MTBUFFormat; 6182 6183 if (!trySkipId("format", AsmToken::Colon)) 6184 return MatchOperand_NoMatch; 6185 6186 if (trySkipToken(AsmToken::LBrac)) { 6187 StringRef FormatStr; 6188 SMLoc Loc = getLoc(); 6189 if (!parseId(FormatStr, "expected a format string")) 6190 return MatchOperand_ParseFail; 6191 6192 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6193 if (Res == MatchOperand_NoMatch) 6194 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6195 if (Res != MatchOperand_Success) 6196 return Res; 6197 6198 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6199 return MatchOperand_ParseFail; 6200 6201 return MatchOperand_Success; 6202 } 6203 6204 return parseNumericFormat(Format); 6205 } 6206 6207 OperandMatchResultTy 6208 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6209 using namespace llvm::AMDGPU::MTBUFFormat; 6210 6211 int64_t Format = getDefaultFormatEncoding(getSTI()); 6212 OperandMatchResultTy Res; 6213 SMLoc Loc = getLoc(); 6214 6215 // Parse legacy format syntax. 6216 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6217 if (Res == MatchOperand_ParseFail) 6218 return Res; 6219 6220 bool FormatFound = (Res == MatchOperand_Success); 6221 6222 Operands.push_back( 6223 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6224 6225 if (FormatFound) 6226 trySkipToken(AsmToken::Comma); 6227 6228 if (isToken(AsmToken::EndOfStatement)) { 6229 // We are expecting an soffset operand, 6230 // but let matcher handle the error. 6231 return MatchOperand_Success; 6232 } 6233 6234 // Parse soffset. 6235 Res = parseRegOrImm(Operands); 6236 if (Res != MatchOperand_Success) 6237 return Res; 6238 6239 trySkipToken(AsmToken::Comma); 6240 6241 if (!FormatFound) { 6242 Res = parseSymbolicOrNumericFormat(Format); 6243 if (Res == MatchOperand_ParseFail) 6244 return Res; 6245 if (Res == MatchOperand_Success) { 6246 auto Size = Operands.size(); 6247 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6248 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6249 Op.setImm(Format); 6250 } 6251 return MatchOperand_Success; 6252 } 6253 6254 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6255 Error(getLoc(), "duplicate format"); 6256 return MatchOperand_ParseFail; 6257 } 6258 return MatchOperand_Success; 6259 } 6260 6261 //===----------------------------------------------------------------------===// 6262 // ds 6263 //===----------------------------------------------------------------------===// 6264 6265 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6266 const OperandVector &Operands) { 6267 OptionalImmIndexMap OptionalIdx; 6268 6269 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6270 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6271 6272 // Add the register arguments 6273 if (Op.isReg()) { 6274 Op.addRegOperands(Inst, 1); 6275 continue; 6276 } 6277 6278 // Handle optional arguments 6279 OptionalIdx[Op.getImmTy()] = i; 6280 } 6281 6282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6285 6286 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6287 } 6288 6289 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6290 bool IsGdsHardcoded) { 6291 OptionalImmIndexMap OptionalIdx; 6292 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6293 6294 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6295 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6296 6297 // Add the register arguments 6298 if (Op.isReg()) { 6299 Op.addRegOperands(Inst, 1); 6300 continue; 6301 } 6302 6303 if (Op.isToken() && Op.getToken() == "gds") { 6304 IsGdsHardcoded = true; 6305 continue; 6306 } 6307 6308 // Handle optional arguments 6309 OptionalIdx[Op.getImmTy()] = i; 6310 6311 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6312 OffsetType = AMDGPUOperand::ImmTySwizzle; 6313 } 6314 6315 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6316 6317 if (!IsGdsHardcoded) { 6318 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6319 } 6320 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6321 } 6322 6323 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6324 OptionalImmIndexMap OptionalIdx; 6325 6326 unsigned OperandIdx[4]; 6327 unsigned EnMask = 0; 6328 int SrcIdx = 0; 6329 6330 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6331 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6332 6333 // Add the register arguments 6334 if (Op.isReg()) { 6335 assert(SrcIdx < 4); 6336 OperandIdx[SrcIdx] = Inst.size(); 6337 Op.addRegOperands(Inst, 1); 6338 ++SrcIdx; 6339 continue; 6340 } 6341 6342 if (Op.isOff()) { 6343 assert(SrcIdx < 4); 6344 OperandIdx[SrcIdx] = Inst.size(); 6345 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6346 ++SrcIdx; 6347 continue; 6348 } 6349 6350 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6351 Op.addImmOperands(Inst, 1); 6352 continue; 6353 } 6354 6355 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6356 continue; 6357 6358 // Handle optional arguments 6359 OptionalIdx[Op.getImmTy()] = i; 6360 } 6361 6362 assert(SrcIdx == 4); 6363 6364 bool Compr = false; 6365 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6366 Compr = true; 6367 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6368 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6369 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6370 } 6371 6372 for (auto i = 0; i < SrcIdx; ++i) { 6373 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6374 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6375 } 6376 } 6377 6378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6380 6381 Inst.addOperand(MCOperand::createImm(EnMask)); 6382 } 6383 6384 //===----------------------------------------------------------------------===// 6385 // s_waitcnt 6386 //===----------------------------------------------------------------------===// 6387 6388 static bool 6389 encodeCnt( 6390 const AMDGPU::IsaVersion ISA, 6391 int64_t &IntVal, 6392 int64_t CntVal, 6393 bool Saturate, 6394 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6395 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6396 { 6397 bool Failed = false; 6398 6399 IntVal = encode(ISA, IntVal, CntVal); 6400 if (CntVal != decode(ISA, IntVal)) { 6401 if (Saturate) { 6402 IntVal = encode(ISA, IntVal, -1); 6403 } else { 6404 Failed = true; 6405 } 6406 } 6407 return Failed; 6408 } 6409 6410 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6411 6412 SMLoc CntLoc = getLoc(); 6413 StringRef CntName = getTokenStr(); 6414 6415 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6416 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6417 return false; 6418 6419 int64_t CntVal; 6420 SMLoc ValLoc = getLoc(); 6421 if (!parseExpr(CntVal)) 6422 return false; 6423 6424 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6425 6426 bool Failed = true; 6427 bool Sat = CntName.endswith("_sat"); 6428 6429 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6430 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6431 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6432 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6433 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6434 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6435 } else { 6436 Error(CntLoc, "invalid counter name " + CntName); 6437 return false; 6438 } 6439 6440 if (Failed) { 6441 Error(ValLoc, "too large value for " + CntName); 6442 return false; 6443 } 6444 6445 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6446 return false; 6447 6448 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6449 if (isToken(AsmToken::EndOfStatement)) { 6450 Error(getLoc(), "expected a counter name"); 6451 return false; 6452 } 6453 } 6454 6455 return true; 6456 } 6457 6458 OperandMatchResultTy 6459 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6461 int64_t Waitcnt = getWaitcntBitMask(ISA); 6462 SMLoc S = getLoc(); 6463 6464 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6465 while (!isToken(AsmToken::EndOfStatement)) { 6466 if (!parseCnt(Waitcnt)) 6467 return MatchOperand_ParseFail; 6468 } 6469 } else { 6470 if (!parseExpr(Waitcnt)) 6471 return MatchOperand_ParseFail; 6472 } 6473 6474 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6475 return MatchOperand_Success; 6476 } 6477 6478 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6479 SMLoc FieldLoc = getLoc(); 6480 StringRef FieldName = getTokenStr(); 6481 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6482 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6483 return false; 6484 6485 SMLoc ValueLoc = getLoc(); 6486 StringRef ValueName = getTokenStr(); 6487 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6488 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6489 return false; 6490 6491 unsigned Shift; 6492 if (FieldName == "instid0") { 6493 Shift = 0; 6494 } else if (FieldName == "instskip") { 6495 Shift = 4; 6496 } else if (FieldName == "instid1") { 6497 Shift = 7; 6498 } else { 6499 Error(FieldLoc, "invalid field name " + FieldName); 6500 return false; 6501 } 6502 6503 int Value; 6504 if (Shift == 4) { 6505 // Parse values for instskip. 6506 Value = StringSwitch<int>(ValueName) 6507 .Case("SAME", 0) 6508 .Case("NEXT", 1) 6509 .Case("SKIP_1", 2) 6510 .Case("SKIP_2", 3) 6511 .Case("SKIP_3", 4) 6512 .Case("SKIP_4", 5) 6513 .Default(-1); 6514 } else { 6515 // Parse values for instid0 and instid1. 6516 Value = StringSwitch<int>(ValueName) 6517 .Case("NO_DEP", 0) 6518 .Case("VALU_DEP_1", 1) 6519 .Case("VALU_DEP_2", 2) 6520 .Case("VALU_DEP_3", 3) 6521 .Case("VALU_DEP_4", 4) 6522 .Case("TRANS32_DEP_1", 5) 6523 .Case("TRANS32_DEP_2", 6) 6524 .Case("TRANS32_DEP_3", 7) 6525 .Case("FMA_ACCUM_CYCLE_1", 8) 6526 .Case("SALU_CYCLE_1", 9) 6527 .Case("SALU_CYCLE_2", 10) 6528 .Case("SALU_CYCLE_3", 11) 6529 .Default(-1); 6530 } 6531 if (Value < 0) { 6532 Error(ValueLoc, "invalid value name " + ValueName); 6533 return false; 6534 } 6535 6536 Delay |= Value << Shift; 6537 return true; 6538 } 6539 6540 OperandMatchResultTy 6541 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6542 int64_t Delay = 0; 6543 SMLoc S = getLoc(); 6544 6545 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6546 do { 6547 if (!parseDelay(Delay)) 6548 return MatchOperand_ParseFail; 6549 } while (trySkipToken(AsmToken::Pipe)); 6550 } else { 6551 if (!parseExpr(Delay)) 6552 return MatchOperand_ParseFail; 6553 } 6554 6555 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6556 return MatchOperand_Success; 6557 } 6558 6559 bool 6560 AMDGPUOperand::isSWaitCnt() const { 6561 return isImm(); 6562 } 6563 6564 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6565 6566 //===----------------------------------------------------------------------===// 6567 // DepCtr 6568 //===----------------------------------------------------------------------===// 6569 6570 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6571 StringRef DepCtrName) { 6572 switch (ErrorId) { 6573 case OPR_ID_UNKNOWN: 6574 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6575 return; 6576 case OPR_ID_UNSUPPORTED: 6577 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6578 return; 6579 case OPR_ID_DUPLICATE: 6580 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6581 return; 6582 case OPR_VAL_INVALID: 6583 Error(Loc, Twine("invalid value for ", DepCtrName)); 6584 return; 6585 default: 6586 assert(false); 6587 } 6588 } 6589 6590 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6591 6592 using namespace llvm::AMDGPU::DepCtr; 6593 6594 SMLoc DepCtrLoc = getLoc(); 6595 StringRef DepCtrName = getTokenStr(); 6596 6597 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6598 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6599 return false; 6600 6601 int64_t ExprVal; 6602 if (!parseExpr(ExprVal)) 6603 return false; 6604 6605 unsigned PrevOprMask = UsedOprMask; 6606 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6607 6608 if (CntVal < 0) { 6609 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6610 return false; 6611 } 6612 6613 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6614 return false; 6615 6616 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6617 if (isToken(AsmToken::EndOfStatement)) { 6618 Error(getLoc(), "expected a counter name"); 6619 return false; 6620 } 6621 } 6622 6623 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6624 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6625 return true; 6626 } 6627 6628 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6629 using namespace llvm::AMDGPU::DepCtr; 6630 6631 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6632 SMLoc Loc = getLoc(); 6633 6634 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6635 unsigned UsedOprMask = 0; 6636 while (!isToken(AsmToken::EndOfStatement)) { 6637 if (!parseDepCtr(DepCtr, UsedOprMask)) 6638 return MatchOperand_ParseFail; 6639 } 6640 } else { 6641 if (!parseExpr(DepCtr)) 6642 return MatchOperand_ParseFail; 6643 } 6644 6645 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6646 return MatchOperand_Success; 6647 } 6648 6649 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6650 6651 //===----------------------------------------------------------------------===// 6652 // hwreg 6653 //===----------------------------------------------------------------------===// 6654 6655 bool 6656 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6657 OperandInfoTy &Offset, 6658 OperandInfoTy &Width) { 6659 using namespace llvm::AMDGPU::Hwreg; 6660 6661 // The register may be specified by name or using a numeric code 6662 HwReg.Loc = getLoc(); 6663 if (isToken(AsmToken::Identifier) && 6664 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6665 HwReg.IsSymbolic = true; 6666 lex(); // skip register name 6667 } else if (!parseExpr(HwReg.Id, "a register name")) { 6668 return false; 6669 } 6670 6671 if (trySkipToken(AsmToken::RParen)) 6672 return true; 6673 6674 // parse optional params 6675 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6676 return false; 6677 6678 Offset.Loc = getLoc(); 6679 if (!parseExpr(Offset.Id)) 6680 return false; 6681 6682 if (!skipToken(AsmToken::Comma, "expected a comma")) 6683 return false; 6684 6685 Width.Loc = getLoc(); 6686 return parseExpr(Width.Id) && 6687 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6688 } 6689 6690 bool 6691 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6692 const OperandInfoTy &Offset, 6693 const OperandInfoTy &Width) { 6694 6695 using namespace llvm::AMDGPU::Hwreg; 6696 6697 if (HwReg.IsSymbolic) { 6698 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6699 Error(HwReg.Loc, 6700 "specified hardware register is not supported on this GPU"); 6701 return false; 6702 } 6703 } else { 6704 if (!isValidHwreg(HwReg.Id)) { 6705 Error(HwReg.Loc, 6706 "invalid code of hardware register: only 6-bit values are legal"); 6707 return false; 6708 } 6709 } 6710 if (!isValidHwregOffset(Offset.Id)) { 6711 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6712 return false; 6713 } 6714 if (!isValidHwregWidth(Width.Id)) { 6715 Error(Width.Loc, 6716 "invalid bitfield width: only values from 1 to 32 are legal"); 6717 return false; 6718 } 6719 return true; 6720 } 6721 6722 OperandMatchResultTy 6723 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6724 using namespace llvm::AMDGPU::Hwreg; 6725 6726 int64_t ImmVal = 0; 6727 SMLoc Loc = getLoc(); 6728 6729 if (trySkipId("hwreg", AsmToken::LParen)) { 6730 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6731 OperandInfoTy Offset(OFFSET_DEFAULT_); 6732 OperandInfoTy Width(WIDTH_DEFAULT_); 6733 if (parseHwregBody(HwReg, Offset, Width) && 6734 validateHwreg(HwReg, Offset, Width)) { 6735 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6736 } else { 6737 return MatchOperand_ParseFail; 6738 } 6739 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6740 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6741 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6742 return MatchOperand_ParseFail; 6743 } 6744 } else { 6745 return MatchOperand_ParseFail; 6746 } 6747 6748 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6749 return MatchOperand_Success; 6750 } 6751 6752 bool AMDGPUOperand::isHwreg() const { 6753 return isImmTy(ImmTyHwreg); 6754 } 6755 6756 //===----------------------------------------------------------------------===// 6757 // sendmsg 6758 //===----------------------------------------------------------------------===// 6759 6760 bool 6761 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6762 OperandInfoTy &Op, 6763 OperandInfoTy &Stream) { 6764 using namespace llvm::AMDGPU::SendMsg; 6765 6766 Msg.Loc = getLoc(); 6767 if (isToken(AsmToken::Identifier) && 6768 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6769 Msg.IsSymbolic = true; 6770 lex(); // skip message name 6771 } else if (!parseExpr(Msg.Id, "a message name")) { 6772 return false; 6773 } 6774 6775 if (trySkipToken(AsmToken::Comma)) { 6776 Op.IsDefined = true; 6777 Op.Loc = getLoc(); 6778 if (isToken(AsmToken::Identifier) && 6779 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6780 lex(); // skip operation name 6781 } else if (!parseExpr(Op.Id, "an operation name")) { 6782 return false; 6783 } 6784 6785 if (trySkipToken(AsmToken::Comma)) { 6786 Stream.IsDefined = true; 6787 Stream.Loc = getLoc(); 6788 if (!parseExpr(Stream.Id)) 6789 return false; 6790 } 6791 } 6792 6793 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6794 } 6795 6796 bool 6797 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6798 const OperandInfoTy &Op, 6799 const OperandInfoTy &Stream) { 6800 using namespace llvm::AMDGPU::SendMsg; 6801 6802 // Validation strictness depends on whether message is specified 6803 // in a symbolic or in a numeric form. In the latter case 6804 // only encoding possibility is checked. 6805 bool Strict = Msg.IsSymbolic; 6806 6807 if (Strict) { 6808 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6809 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6810 return false; 6811 } 6812 } else { 6813 if (!isValidMsgId(Msg.Id, getSTI())) { 6814 Error(Msg.Loc, "invalid message id"); 6815 return false; 6816 } 6817 } 6818 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6819 if (Op.IsDefined) { 6820 Error(Op.Loc, "message does not support operations"); 6821 } else { 6822 Error(Msg.Loc, "missing message operation"); 6823 } 6824 return false; 6825 } 6826 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6827 Error(Op.Loc, "invalid operation id"); 6828 return false; 6829 } 6830 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6831 Stream.IsDefined) { 6832 Error(Stream.Loc, "message operation does not support streams"); 6833 return false; 6834 } 6835 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6836 Error(Stream.Loc, "invalid message stream id"); 6837 return false; 6838 } 6839 return true; 6840 } 6841 6842 OperandMatchResultTy 6843 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6844 using namespace llvm::AMDGPU::SendMsg; 6845 6846 int64_t ImmVal = 0; 6847 SMLoc Loc = getLoc(); 6848 6849 if (trySkipId("sendmsg", AsmToken::LParen)) { 6850 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6851 OperandInfoTy Op(OP_NONE_); 6852 OperandInfoTy Stream(STREAM_ID_NONE_); 6853 if (parseSendMsgBody(Msg, Op, Stream) && 6854 validateSendMsg(Msg, Op, Stream)) { 6855 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6856 } else { 6857 return MatchOperand_ParseFail; 6858 } 6859 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6860 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6861 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6862 return MatchOperand_ParseFail; 6863 } 6864 } else { 6865 return MatchOperand_ParseFail; 6866 } 6867 6868 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6869 return MatchOperand_Success; 6870 } 6871 6872 bool AMDGPUOperand::isSendMsg() const { 6873 return isImmTy(ImmTySendMsg); 6874 } 6875 6876 //===----------------------------------------------------------------------===// 6877 // v_interp 6878 //===----------------------------------------------------------------------===// 6879 6880 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6881 StringRef Str; 6882 SMLoc S = getLoc(); 6883 6884 if (!parseId(Str)) 6885 return MatchOperand_NoMatch; 6886 6887 int Slot = StringSwitch<int>(Str) 6888 .Case("p10", 0) 6889 .Case("p20", 1) 6890 .Case("p0", 2) 6891 .Default(-1); 6892 6893 if (Slot == -1) { 6894 Error(S, "invalid interpolation slot"); 6895 return MatchOperand_ParseFail; 6896 } 6897 6898 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6899 AMDGPUOperand::ImmTyInterpSlot)); 6900 return MatchOperand_Success; 6901 } 6902 6903 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6904 StringRef Str; 6905 SMLoc S = getLoc(); 6906 6907 if (!parseId(Str)) 6908 return MatchOperand_NoMatch; 6909 6910 if (!Str.startswith("attr")) { 6911 Error(S, "invalid interpolation attribute"); 6912 return MatchOperand_ParseFail; 6913 } 6914 6915 StringRef Chan = Str.take_back(2); 6916 int AttrChan = StringSwitch<int>(Chan) 6917 .Case(".x", 0) 6918 .Case(".y", 1) 6919 .Case(".z", 2) 6920 .Case(".w", 3) 6921 .Default(-1); 6922 if (AttrChan == -1) { 6923 Error(S, "invalid or missing interpolation attribute channel"); 6924 return MatchOperand_ParseFail; 6925 } 6926 6927 Str = Str.drop_back(2).drop_front(4); 6928 6929 uint8_t Attr; 6930 if (Str.getAsInteger(10, Attr)) { 6931 Error(S, "invalid or missing interpolation attribute number"); 6932 return MatchOperand_ParseFail; 6933 } 6934 6935 if (Attr > 63) { 6936 Error(S, "out of bounds interpolation attribute number"); 6937 return MatchOperand_ParseFail; 6938 } 6939 6940 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6941 6942 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6943 AMDGPUOperand::ImmTyInterpAttr)); 6944 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6945 AMDGPUOperand::ImmTyAttrChan)); 6946 return MatchOperand_Success; 6947 } 6948 6949 //===----------------------------------------------------------------------===// 6950 // exp 6951 //===----------------------------------------------------------------------===// 6952 6953 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6954 using namespace llvm::AMDGPU::Exp; 6955 6956 StringRef Str; 6957 SMLoc S = getLoc(); 6958 6959 if (!parseId(Str)) 6960 return MatchOperand_NoMatch; 6961 6962 unsigned Id = getTgtId(Str); 6963 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6964 Error(S, (Id == ET_INVALID) ? 6965 "invalid exp target" : 6966 "exp target is not supported on this GPU"); 6967 return MatchOperand_ParseFail; 6968 } 6969 6970 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6971 AMDGPUOperand::ImmTyExpTgt)); 6972 return MatchOperand_Success; 6973 } 6974 6975 //===----------------------------------------------------------------------===// 6976 // parser helpers 6977 //===----------------------------------------------------------------------===// 6978 6979 bool 6980 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6981 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6982 } 6983 6984 bool 6985 AMDGPUAsmParser::isId(const StringRef Id) const { 6986 return isId(getToken(), Id); 6987 } 6988 6989 bool 6990 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6991 return getTokenKind() == Kind; 6992 } 6993 6994 bool 6995 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6996 if (isId(Id)) { 6997 lex(); 6998 return true; 6999 } 7000 return false; 7001 } 7002 7003 bool 7004 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7005 if (isToken(AsmToken::Identifier)) { 7006 StringRef Tok = getTokenStr(); 7007 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7008 lex(); 7009 return true; 7010 } 7011 } 7012 return false; 7013 } 7014 7015 bool 7016 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7017 if (isId(Id) && peekToken().is(Kind)) { 7018 lex(); 7019 lex(); 7020 return true; 7021 } 7022 return false; 7023 } 7024 7025 bool 7026 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7027 if (isToken(Kind)) { 7028 lex(); 7029 return true; 7030 } 7031 return false; 7032 } 7033 7034 bool 7035 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7036 const StringRef ErrMsg) { 7037 if (!trySkipToken(Kind)) { 7038 Error(getLoc(), ErrMsg); 7039 return false; 7040 } 7041 return true; 7042 } 7043 7044 bool 7045 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7046 SMLoc S = getLoc(); 7047 7048 const MCExpr *Expr; 7049 if (Parser.parseExpression(Expr)) 7050 return false; 7051 7052 if (Expr->evaluateAsAbsolute(Imm)) 7053 return true; 7054 7055 if (Expected.empty()) { 7056 Error(S, "expected absolute expression"); 7057 } else { 7058 Error(S, Twine("expected ", Expected) + 7059 Twine(" or an absolute expression")); 7060 } 7061 return false; 7062 } 7063 7064 bool 7065 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7066 SMLoc S = getLoc(); 7067 7068 const MCExpr *Expr; 7069 if (Parser.parseExpression(Expr)) 7070 return false; 7071 7072 int64_t IntVal; 7073 if (Expr->evaluateAsAbsolute(IntVal)) { 7074 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7075 } else { 7076 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7077 } 7078 return true; 7079 } 7080 7081 bool 7082 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7083 if (isToken(AsmToken::String)) { 7084 Val = getToken().getStringContents(); 7085 lex(); 7086 return true; 7087 } else { 7088 Error(getLoc(), ErrMsg); 7089 return false; 7090 } 7091 } 7092 7093 bool 7094 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7095 if (isToken(AsmToken::Identifier)) { 7096 Val = getTokenStr(); 7097 lex(); 7098 return true; 7099 } else { 7100 if (!ErrMsg.empty()) 7101 Error(getLoc(), ErrMsg); 7102 return false; 7103 } 7104 } 7105 7106 AsmToken 7107 AMDGPUAsmParser::getToken() const { 7108 return Parser.getTok(); 7109 } 7110 7111 AsmToken 7112 AMDGPUAsmParser::peekToken() { 7113 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7114 } 7115 7116 void 7117 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7118 auto TokCount = getLexer().peekTokens(Tokens); 7119 7120 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7121 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7122 } 7123 7124 AsmToken::TokenKind 7125 AMDGPUAsmParser::getTokenKind() const { 7126 return getLexer().getKind(); 7127 } 7128 7129 SMLoc 7130 AMDGPUAsmParser::getLoc() const { 7131 return getToken().getLoc(); 7132 } 7133 7134 StringRef 7135 AMDGPUAsmParser::getTokenStr() const { 7136 return getToken().getString(); 7137 } 7138 7139 void 7140 AMDGPUAsmParser::lex() { 7141 Parser.Lex(); 7142 } 7143 7144 SMLoc 7145 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7146 const OperandVector &Operands) const { 7147 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7148 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7149 if (Test(Op)) 7150 return Op.getStartLoc(); 7151 } 7152 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7153 } 7154 7155 SMLoc 7156 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7157 const OperandVector &Operands) const { 7158 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7159 return getOperandLoc(Test, Operands); 7160 } 7161 7162 SMLoc 7163 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7164 const OperandVector &Operands) const { 7165 auto Test = [=](const AMDGPUOperand& Op) { 7166 return Op.isRegKind() && Op.getReg() == Reg; 7167 }; 7168 return getOperandLoc(Test, Operands); 7169 } 7170 7171 SMLoc 7172 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7173 auto Test = [](const AMDGPUOperand& Op) { 7174 return Op.IsImmKindLiteral() || Op.isExpr(); 7175 }; 7176 return getOperandLoc(Test, Operands); 7177 } 7178 7179 SMLoc 7180 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7181 auto Test = [](const AMDGPUOperand& Op) { 7182 return Op.isImmKindConst(); 7183 }; 7184 return getOperandLoc(Test, Operands); 7185 } 7186 7187 //===----------------------------------------------------------------------===// 7188 // swizzle 7189 //===----------------------------------------------------------------------===// 7190 7191 LLVM_READNONE 7192 static unsigned 7193 encodeBitmaskPerm(const unsigned AndMask, 7194 const unsigned OrMask, 7195 const unsigned XorMask) { 7196 using namespace llvm::AMDGPU::Swizzle; 7197 7198 return BITMASK_PERM_ENC | 7199 (AndMask << BITMASK_AND_SHIFT) | 7200 (OrMask << BITMASK_OR_SHIFT) | 7201 (XorMask << BITMASK_XOR_SHIFT); 7202 } 7203 7204 bool 7205 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7206 const unsigned MinVal, 7207 const unsigned MaxVal, 7208 const StringRef ErrMsg, 7209 SMLoc &Loc) { 7210 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7211 return false; 7212 } 7213 Loc = getLoc(); 7214 if (!parseExpr(Op)) { 7215 return false; 7216 } 7217 if (Op < MinVal || Op > MaxVal) { 7218 Error(Loc, ErrMsg); 7219 return false; 7220 } 7221 7222 return true; 7223 } 7224 7225 bool 7226 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7227 const unsigned MinVal, 7228 const unsigned MaxVal, 7229 const StringRef ErrMsg) { 7230 SMLoc Loc; 7231 for (unsigned i = 0; i < OpNum; ++i) { 7232 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7233 return false; 7234 } 7235 7236 return true; 7237 } 7238 7239 bool 7240 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7241 using namespace llvm::AMDGPU::Swizzle; 7242 7243 int64_t Lane[LANE_NUM]; 7244 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7245 "expected a 2-bit lane id")) { 7246 Imm = QUAD_PERM_ENC; 7247 for (unsigned I = 0; I < LANE_NUM; ++I) { 7248 Imm |= Lane[I] << (LANE_SHIFT * I); 7249 } 7250 return true; 7251 } 7252 return false; 7253 } 7254 7255 bool 7256 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7257 using namespace llvm::AMDGPU::Swizzle; 7258 7259 SMLoc Loc; 7260 int64_t GroupSize; 7261 int64_t LaneIdx; 7262 7263 if (!parseSwizzleOperand(GroupSize, 7264 2, 32, 7265 "group size must be in the interval [2,32]", 7266 Loc)) { 7267 return false; 7268 } 7269 if (!isPowerOf2_64(GroupSize)) { 7270 Error(Loc, "group size must be a power of two"); 7271 return false; 7272 } 7273 if (parseSwizzleOperand(LaneIdx, 7274 0, GroupSize - 1, 7275 "lane id must be in the interval [0,group size - 1]", 7276 Loc)) { 7277 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7278 return true; 7279 } 7280 return false; 7281 } 7282 7283 bool 7284 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7285 using namespace llvm::AMDGPU::Swizzle; 7286 7287 SMLoc Loc; 7288 int64_t GroupSize; 7289 7290 if (!parseSwizzleOperand(GroupSize, 7291 2, 32, 7292 "group size must be in the interval [2,32]", 7293 Loc)) { 7294 return false; 7295 } 7296 if (!isPowerOf2_64(GroupSize)) { 7297 Error(Loc, "group size must be a power of two"); 7298 return false; 7299 } 7300 7301 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7302 return true; 7303 } 7304 7305 bool 7306 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7307 using namespace llvm::AMDGPU::Swizzle; 7308 7309 SMLoc Loc; 7310 int64_t GroupSize; 7311 7312 if (!parseSwizzleOperand(GroupSize, 7313 1, 16, 7314 "group size must be in the interval [1,16]", 7315 Loc)) { 7316 return false; 7317 } 7318 if (!isPowerOf2_64(GroupSize)) { 7319 Error(Loc, "group size must be a power of two"); 7320 return false; 7321 } 7322 7323 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7324 return true; 7325 } 7326 7327 bool 7328 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7329 using namespace llvm::AMDGPU::Swizzle; 7330 7331 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7332 return false; 7333 } 7334 7335 StringRef Ctl; 7336 SMLoc StrLoc = getLoc(); 7337 if (!parseString(Ctl)) { 7338 return false; 7339 } 7340 if (Ctl.size() != BITMASK_WIDTH) { 7341 Error(StrLoc, "expected a 5-character mask"); 7342 return false; 7343 } 7344 7345 unsigned AndMask = 0; 7346 unsigned OrMask = 0; 7347 unsigned XorMask = 0; 7348 7349 for (size_t i = 0; i < Ctl.size(); ++i) { 7350 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7351 switch(Ctl[i]) { 7352 default: 7353 Error(StrLoc, "invalid mask"); 7354 return false; 7355 case '0': 7356 break; 7357 case '1': 7358 OrMask |= Mask; 7359 break; 7360 case 'p': 7361 AndMask |= Mask; 7362 break; 7363 case 'i': 7364 AndMask |= Mask; 7365 XorMask |= Mask; 7366 break; 7367 } 7368 } 7369 7370 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7371 return true; 7372 } 7373 7374 bool 7375 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7376 7377 SMLoc OffsetLoc = getLoc(); 7378 7379 if (!parseExpr(Imm, "a swizzle macro")) { 7380 return false; 7381 } 7382 if (!isUInt<16>(Imm)) { 7383 Error(OffsetLoc, "expected a 16-bit offset"); 7384 return false; 7385 } 7386 return true; 7387 } 7388 7389 bool 7390 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7391 using namespace llvm::AMDGPU::Swizzle; 7392 7393 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7394 7395 SMLoc ModeLoc = getLoc(); 7396 bool Ok = false; 7397 7398 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7399 Ok = parseSwizzleQuadPerm(Imm); 7400 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7401 Ok = parseSwizzleBitmaskPerm(Imm); 7402 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7403 Ok = parseSwizzleBroadcast(Imm); 7404 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7405 Ok = parseSwizzleSwap(Imm); 7406 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7407 Ok = parseSwizzleReverse(Imm); 7408 } else { 7409 Error(ModeLoc, "expected a swizzle mode"); 7410 } 7411 7412 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7413 } 7414 7415 return false; 7416 } 7417 7418 OperandMatchResultTy 7419 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7420 SMLoc S = getLoc(); 7421 int64_t Imm = 0; 7422 7423 if (trySkipId("offset")) { 7424 7425 bool Ok = false; 7426 if (skipToken(AsmToken::Colon, "expected a colon")) { 7427 if (trySkipId("swizzle")) { 7428 Ok = parseSwizzleMacro(Imm); 7429 } else { 7430 Ok = parseSwizzleOffset(Imm); 7431 } 7432 } 7433 7434 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7435 7436 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7437 } else { 7438 // Swizzle "offset" operand is optional. 7439 // If it is omitted, try parsing other optional operands. 7440 return parseOptionalOpr(Operands); 7441 } 7442 } 7443 7444 bool 7445 AMDGPUOperand::isSwizzle() const { 7446 return isImmTy(ImmTySwizzle); 7447 } 7448 7449 //===----------------------------------------------------------------------===// 7450 // VGPR Index Mode 7451 //===----------------------------------------------------------------------===// 7452 7453 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7454 7455 using namespace llvm::AMDGPU::VGPRIndexMode; 7456 7457 if (trySkipToken(AsmToken::RParen)) { 7458 return OFF; 7459 } 7460 7461 int64_t Imm = 0; 7462 7463 while (true) { 7464 unsigned Mode = 0; 7465 SMLoc S = getLoc(); 7466 7467 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7468 if (trySkipId(IdSymbolic[ModeId])) { 7469 Mode = 1 << ModeId; 7470 break; 7471 } 7472 } 7473 7474 if (Mode == 0) { 7475 Error(S, (Imm == 0)? 7476 "expected a VGPR index mode or a closing parenthesis" : 7477 "expected a VGPR index mode"); 7478 return UNDEF; 7479 } 7480 7481 if (Imm & Mode) { 7482 Error(S, "duplicate VGPR index mode"); 7483 return UNDEF; 7484 } 7485 Imm |= Mode; 7486 7487 if (trySkipToken(AsmToken::RParen)) 7488 break; 7489 if (!skipToken(AsmToken::Comma, 7490 "expected a comma or a closing parenthesis")) 7491 return UNDEF; 7492 } 7493 7494 return Imm; 7495 } 7496 7497 OperandMatchResultTy 7498 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7499 7500 using namespace llvm::AMDGPU::VGPRIndexMode; 7501 7502 int64_t Imm = 0; 7503 SMLoc S = getLoc(); 7504 7505 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7506 Imm = parseGPRIdxMacro(); 7507 if (Imm == UNDEF) 7508 return MatchOperand_ParseFail; 7509 } else { 7510 if (getParser().parseAbsoluteExpression(Imm)) 7511 return MatchOperand_ParseFail; 7512 if (Imm < 0 || !isUInt<4>(Imm)) { 7513 Error(S, "invalid immediate: only 4-bit values are legal"); 7514 return MatchOperand_ParseFail; 7515 } 7516 } 7517 7518 Operands.push_back( 7519 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7520 return MatchOperand_Success; 7521 } 7522 7523 bool AMDGPUOperand::isGPRIdxMode() const { 7524 return isImmTy(ImmTyGprIdxMode); 7525 } 7526 7527 //===----------------------------------------------------------------------===// 7528 // sopp branch targets 7529 //===----------------------------------------------------------------------===// 7530 7531 OperandMatchResultTy 7532 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7533 7534 // Make sure we are not parsing something 7535 // that looks like a label or an expression but is not. 7536 // This will improve error messages. 7537 if (isRegister() || isModifier()) 7538 return MatchOperand_NoMatch; 7539 7540 if (!parseExpr(Operands)) 7541 return MatchOperand_ParseFail; 7542 7543 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7544 assert(Opr.isImm() || Opr.isExpr()); 7545 SMLoc Loc = Opr.getStartLoc(); 7546 7547 // Currently we do not support arbitrary expressions as branch targets. 7548 // Only labels and absolute expressions are accepted. 7549 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7550 Error(Loc, "expected an absolute expression or a label"); 7551 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7552 Error(Loc, "expected a 16-bit signed jump offset"); 7553 } 7554 7555 return MatchOperand_Success; 7556 } 7557 7558 //===----------------------------------------------------------------------===// 7559 // Boolean holding registers 7560 //===----------------------------------------------------------------------===// 7561 7562 OperandMatchResultTy 7563 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7564 return parseReg(Operands); 7565 } 7566 7567 //===----------------------------------------------------------------------===// 7568 // mubuf 7569 //===----------------------------------------------------------------------===// 7570 7571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7572 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7573 } 7574 7575 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7576 const OperandVector &Operands, 7577 bool IsAtomic, 7578 bool IsLds) { 7579 OptionalImmIndexMap OptionalIdx; 7580 unsigned FirstOperandIdx = 1; 7581 bool IsAtomicReturn = false; 7582 7583 if (IsAtomic) { 7584 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7585 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7586 if (!Op.isCPol()) 7587 continue; 7588 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7589 break; 7590 } 7591 7592 if (!IsAtomicReturn) { 7593 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7594 if (NewOpc != -1) 7595 Inst.setOpcode(NewOpc); 7596 } 7597 7598 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7599 SIInstrFlags::IsAtomicRet; 7600 } 7601 7602 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7603 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7604 7605 // Add the register arguments 7606 if (Op.isReg()) { 7607 Op.addRegOperands(Inst, 1); 7608 // Insert a tied src for atomic return dst. 7609 // This cannot be postponed as subsequent calls to 7610 // addImmOperands rely on correct number of MC operands. 7611 if (IsAtomicReturn && i == FirstOperandIdx) 7612 Op.addRegOperands(Inst, 1); 7613 continue; 7614 } 7615 7616 // Handle the case where soffset is an immediate 7617 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7618 Op.addImmOperands(Inst, 1); 7619 continue; 7620 } 7621 7622 // Handle tokens like 'offen' which are sometimes hard-coded into the 7623 // asm string. There are no MCInst operands for these. 7624 if (Op.isToken()) { 7625 continue; 7626 } 7627 assert(Op.isImm()); 7628 7629 // Handle optional arguments 7630 OptionalIdx[Op.getImmTy()] = i; 7631 } 7632 7633 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7635 7636 if (!IsLds) { // tfe is not legal with lds opcodes 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7638 } 7639 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7640 } 7641 7642 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7643 OptionalImmIndexMap OptionalIdx; 7644 7645 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7646 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7647 7648 // Add the register arguments 7649 if (Op.isReg()) { 7650 Op.addRegOperands(Inst, 1); 7651 continue; 7652 } 7653 7654 // Handle the case where soffset is an immediate 7655 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7656 Op.addImmOperands(Inst, 1); 7657 continue; 7658 } 7659 7660 // Handle tokens like 'offen' which are sometimes hard-coded into the 7661 // asm string. There are no MCInst operands for these. 7662 if (Op.isToken()) { 7663 continue; 7664 } 7665 assert(Op.isImm()); 7666 7667 // Handle optional arguments 7668 OptionalIdx[Op.getImmTy()] = i; 7669 } 7670 7671 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7672 AMDGPUOperand::ImmTyOffset); 7673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7674 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7677 } 7678 7679 //===----------------------------------------------------------------------===// 7680 // mimg 7681 //===----------------------------------------------------------------------===// 7682 7683 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7684 bool IsAtomic) { 7685 unsigned I = 1; 7686 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7687 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7688 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7689 } 7690 7691 if (IsAtomic) { 7692 // Add src, same as dst 7693 assert(Desc.getNumDefs() == 1); 7694 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7695 } 7696 7697 OptionalImmIndexMap OptionalIdx; 7698 7699 for (unsigned E = Operands.size(); I != E; ++I) { 7700 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7701 7702 // Add the register arguments 7703 if (Op.isReg()) { 7704 Op.addRegOperands(Inst, 1); 7705 } else if (Op.isImmModifier()) { 7706 OptionalIdx[Op.getImmTy()] = I; 7707 } else if (!Op.isToken()) { 7708 llvm_unreachable("unexpected operand type"); 7709 } 7710 } 7711 7712 bool IsGFX10Plus = isGFX10Plus(); 7713 7714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7715 if (IsGFX10Plus) 7716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7720 if (IsGFX10Plus) 7721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7722 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7725 if (!IsGFX10Plus) 7726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7728 } 7729 7730 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7731 cvtMIMG(Inst, Operands, true); 7732 } 7733 7734 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7735 OptionalImmIndexMap OptionalIdx; 7736 bool IsAtomicReturn = false; 7737 7738 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7740 if (!Op.isCPol()) 7741 continue; 7742 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7743 break; 7744 } 7745 7746 if (!IsAtomicReturn) { 7747 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7748 if (NewOpc != -1) 7749 Inst.setOpcode(NewOpc); 7750 } 7751 7752 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7753 SIInstrFlags::IsAtomicRet; 7754 7755 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7756 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7757 7758 // Add the register arguments 7759 if (Op.isReg()) { 7760 Op.addRegOperands(Inst, 1); 7761 if (IsAtomicReturn && i == 1) 7762 Op.addRegOperands(Inst, 1); 7763 continue; 7764 } 7765 7766 // Handle the case where soffset is an immediate 7767 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7768 Op.addImmOperands(Inst, 1); 7769 continue; 7770 } 7771 7772 // Handle tokens like 'offen' which are sometimes hard-coded into the 7773 // asm string. There are no MCInst operands for these. 7774 if (Op.isToken()) { 7775 continue; 7776 } 7777 assert(Op.isImm()); 7778 7779 // Handle optional arguments 7780 OptionalIdx[Op.getImmTy()] = i; 7781 } 7782 7783 if ((int)Inst.getNumOperands() <= 7784 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7787 } 7788 7789 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7790 const OperandVector &Operands) { 7791 for (unsigned I = 1; I < Operands.size(); ++I) { 7792 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7793 if (Operand.isReg()) 7794 Operand.addRegOperands(Inst, 1); 7795 } 7796 7797 Inst.addOperand(MCOperand::createImm(1)); // a16 7798 } 7799 7800 //===----------------------------------------------------------------------===// 7801 // smrd 7802 //===----------------------------------------------------------------------===// 7803 7804 bool AMDGPUOperand::isSMRDOffset8() const { 7805 return isImm() && isUInt<8>(getImm()); 7806 } 7807 7808 bool AMDGPUOperand::isSMEMOffset() const { 7809 return isImmTy(ImmTyNone) || 7810 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7811 } 7812 7813 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7814 // 32-bit literals are only supported on CI and we only want to use them 7815 // when the offset is > 8-bits. 7816 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7817 } 7818 7819 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7820 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7821 } 7822 7823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7824 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7825 } 7826 7827 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7828 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7829 } 7830 7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7832 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7833 } 7834 7835 //===----------------------------------------------------------------------===// 7836 // vop3 7837 //===----------------------------------------------------------------------===// 7838 7839 static bool ConvertOmodMul(int64_t &Mul) { 7840 if (Mul != 1 && Mul != 2 && Mul != 4) 7841 return false; 7842 7843 Mul >>= 1; 7844 return true; 7845 } 7846 7847 static bool ConvertOmodDiv(int64_t &Div) { 7848 if (Div == 1) { 7849 Div = 0; 7850 return true; 7851 } 7852 7853 if (Div == 2) { 7854 Div = 3; 7855 return true; 7856 } 7857 7858 return false; 7859 } 7860 7861 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7862 // This is intentional and ensures compatibility with sp3. 7863 // See bug 35397 for details. 7864 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7865 if (BoundCtrl == 0 || BoundCtrl == 1) { 7866 BoundCtrl = 1; 7867 return true; 7868 } 7869 return false; 7870 } 7871 7872 // Note: the order in this table matches the order of operands in AsmString. 7873 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7874 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7875 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7876 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7877 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7878 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7879 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7880 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7881 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7882 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7883 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7884 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7885 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7886 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7887 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7888 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7889 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7890 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7891 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7892 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7893 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7894 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7895 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7896 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7897 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7898 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7899 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7900 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7901 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7902 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7903 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7904 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7905 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7906 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7907 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7908 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7909 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7910 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7911 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7912 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7913 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7914 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7915 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7916 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7917 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7918 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7919 }; 7920 7921 void AMDGPUAsmParser::onBeginOfFile() { 7922 if (!getParser().getStreamer().getTargetStreamer() || 7923 getSTI().getTargetTriple().getArch() == Triple::r600) 7924 return; 7925 7926 if (!getTargetStreamer().getTargetID()) 7927 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7928 7929 if (isHsaAbiVersion3AndAbove(&getSTI())) 7930 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7931 } 7932 7933 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7934 7935 OperandMatchResultTy res = parseOptionalOpr(Operands); 7936 7937 // This is a hack to enable hardcoded mandatory operands which follow 7938 // optional operands. 7939 // 7940 // Current design assumes that all operands after the first optional operand 7941 // are also optional. However implementation of some instructions violates 7942 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7943 // 7944 // To alleviate this problem, we have to (implicitly) parse extra operands 7945 // to make sure autogenerated parser of custom operands never hit hardcoded 7946 // mandatory operands. 7947 7948 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7949 if (res != MatchOperand_Success || 7950 isToken(AsmToken::EndOfStatement)) 7951 break; 7952 7953 trySkipToken(AsmToken::Comma); 7954 res = parseOptionalOpr(Operands); 7955 } 7956 7957 return res; 7958 } 7959 7960 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7961 OperandMatchResultTy res; 7962 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7963 // try to parse any optional operand here 7964 if (Op.IsBit) { 7965 res = parseNamedBit(Op.Name, Operands, Op.Type); 7966 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7967 res = parseOModOperand(Operands); 7968 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7969 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7970 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7971 res = parseSDWASel(Operands, Op.Name, Op.Type); 7972 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7973 res = parseSDWADstUnused(Operands); 7974 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7975 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7976 Op.Type == AMDGPUOperand::ImmTyNegLo || 7977 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7978 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7979 Op.ConvertResult); 7980 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7981 res = parseDim(Operands); 7982 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7983 res = parseCPol(Operands); 7984 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7985 res = parseDPP8(Operands); 7986 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7987 res = parseDPPCtrl(Operands); 7988 } else { 7989 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7990 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7991 res = parseOperandArrayWithPrefix("neg", Operands, 7992 AMDGPUOperand::ImmTyBLGP, 7993 nullptr); 7994 } 7995 } 7996 if (res != MatchOperand_NoMatch) { 7997 return res; 7998 } 7999 } 8000 return MatchOperand_NoMatch; 8001 } 8002 8003 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8004 StringRef Name = getTokenStr(); 8005 if (Name == "mul") { 8006 return parseIntWithPrefix("mul", Operands, 8007 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8008 } 8009 8010 if (Name == "div") { 8011 return parseIntWithPrefix("div", Operands, 8012 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8013 } 8014 8015 return MatchOperand_NoMatch; 8016 } 8017 8018 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8019 cvtVOP3P(Inst, Operands); 8020 8021 int Opc = Inst.getOpcode(); 8022 8023 int SrcNum; 8024 const int Ops[] = { AMDGPU::OpName::src0, 8025 AMDGPU::OpName::src1, 8026 AMDGPU::OpName::src2 }; 8027 for (SrcNum = 0; 8028 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8029 ++SrcNum); 8030 assert(SrcNum > 0); 8031 8032 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8033 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8034 8035 if ((OpSel & (1 << SrcNum)) != 0) { 8036 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8037 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8038 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8039 } 8040 } 8041 8042 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8043 // 1. This operand is input modifiers 8044 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8045 // 2. This is not last operand 8046 && Desc.NumOperands > (OpNum + 1) 8047 // 3. Next operand is register class 8048 && Desc.OpInfo[OpNum + 1].RegClass != -1 8049 // 4. Next register is not tied to any other operand 8050 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8051 } 8052 8053 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8054 { 8055 OptionalImmIndexMap OptionalIdx; 8056 unsigned Opc = Inst.getOpcode(); 8057 8058 unsigned I = 1; 8059 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8060 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8061 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8062 } 8063 8064 for (unsigned E = Operands.size(); I != E; ++I) { 8065 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8066 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8067 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8068 } else if (Op.isInterpSlot() || 8069 Op.isInterpAttr() || 8070 Op.isAttrChan()) { 8071 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8072 } else if (Op.isImmModifier()) { 8073 OptionalIdx[Op.getImmTy()] = I; 8074 } else { 8075 llvm_unreachable("unhandled operand type"); 8076 } 8077 } 8078 8079 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8081 } 8082 8083 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8085 } 8086 8087 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8089 } 8090 } 8091 8092 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8093 { 8094 OptionalImmIndexMap OptionalIdx; 8095 unsigned Opc = Inst.getOpcode(); 8096 8097 unsigned I = 1; 8098 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8099 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8100 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8101 } 8102 8103 for (unsigned E = Operands.size(); I != E; ++I) { 8104 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8105 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8106 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8107 } else if (Op.isImmModifier()) { 8108 OptionalIdx[Op.getImmTy()] = I; 8109 } else { 8110 llvm_unreachable("unhandled operand type"); 8111 } 8112 } 8113 8114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8115 8116 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8117 if (OpSelIdx != -1) 8118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8119 8120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8121 8122 if (OpSelIdx == -1) 8123 return; 8124 8125 const int Ops[] = { AMDGPU::OpName::src0, 8126 AMDGPU::OpName::src1, 8127 AMDGPU::OpName::src2 }; 8128 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8129 AMDGPU::OpName::src1_modifiers, 8130 AMDGPU::OpName::src2_modifiers }; 8131 8132 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8133 8134 for (int J = 0; J < 3; ++J) { 8135 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8136 if (OpIdx == -1) 8137 break; 8138 8139 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8140 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8141 8142 if ((OpSel & (1 << J)) != 0) 8143 ModVal |= SISrcMods::OP_SEL_0; 8144 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8145 (OpSel & (1 << 3)) != 0) 8146 ModVal |= SISrcMods::DST_OP_SEL; 8147 8148 Inst.getOperand(ModIdx).setImm(ModVal); 8149 } 8150 } 8151 8152 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8153 OptionalImmIndexMap &OptionalIdx) { 8154 unsigned Opc = Inst.getOpcode(); 8155 8156 unsigned I = 1; 8157 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8158 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8159 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8160 } 8161 8162 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8163 // This instruction has src modifiers 8164 for (unsigned E = Operands.size(); I != E; ++I) { 8165 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8166 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8167 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8168 } else if (Op.isImmModifier()) { 8169 OptionalIdx[Op.getImmTy()] = I; 8170 } else if (Op.isRegOrImm()) { 8171 Op.addRegOrImmOperands(Inst, 1); 8172 } else { 8173 llvm_unreachable("unhandled operand type"); 8174 } 8175 } 8176 } else { 8177 // No src modifiers 8178 for (unsigned E = Operands.size(); I != E; ++I) { 8179 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8180 if (Op.isMod()) { 8181 OptionalIdx[Op.getImmTy()] = I; 8182 } else { 8183 Op.addRegOrImmOperands(Inst, 1); 8184 } 8185 } 8186 } 8187 8188 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8190 } 8191 8192 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8194 } 8195 8196 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8197 // it has src2 register operand that is tied to dst operand 8198 // we don't allow modifiers for this operand in assembler so src2_modifiers 8199 // should be 0. 8200 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8201 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8202 Opc == AMDGPU::V_MAC_F32_e64_vi || 8203 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8204 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8205 Opc == AMDGPU::V_MAC_F16_e64_vi || 8206 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8207 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8208 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8209 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8210 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8211 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8212 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8213 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8214 auto it = Inst.begin(); 8215 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8216 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8217 ++it; 8218 // Copy the operand to ensure it's not invalidated when Inst grows. 8219 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8220 } 8221 } 8222 8223 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8224 OptionalImmIndexMap OptionalIdx; 8225 cvtVOP3(Inst, Operands, OptionalIdx); 8226 } 8227 8228 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8229 OptionalImmIndexMap &OptIdx) { 8230 const int Opc = Inst.getOpcode(); 8231 const MCInstrDesc &Desc = MII.get(Opc); 8232 8233 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8234 8235 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8236 assert(!IsPacked); 8237 Inst.addOperand(Inst.getOperand(0)); 8238 } 8239 8240 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8241 // instruction, and then figure out where to actually put the modifiers 8242 8243 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8244 if (OpSelIdx != -1) { 8245 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8246 } 8247 8248 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8249 if (OpSelHiIdx != -1) { 8250 int DefaultVal = IsPacked ? -1 : 0; 8251 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8252 DefaultVal); 8253 } 8254 8255 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8256 if (NegLoIdx != -1) { 8257 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8258 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8259 } 8260 8261 const int Ops[] = { AMDGPU::OpName::src0, 8262 AMDGPU::OpName::src1, 8263 AMDGPU::OpName::src2 }; 8264 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8265 AMDGPU::OpName::src1_modifiers, 8266 AMDGPU::OpName::src2_modifiers }; 8267 8268 unsigned OpSel = 0; 8269 unsigned OpSelHi = 0; 8270 unsigned NegLo = 0; 8271 unsigned NegHi = 0; 8272 8273 if (OpSelIdx != -1) 8274 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8275 8276 if (OpSelHiIdx != -1) 8277 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8278 8279 if (NegLoIdx != -1) { 8280 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8281 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8282 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8283 } 8284 8285 for (int J = 0; J < 3; ++J) { 8286 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8287 if (OpIdx == -1) 8288 break; 8289 8290 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8291 8292 if (ModIdx == -1) 8293 continue; 8294 8295 uint32_t ModVal = 0; 8296 8297 if ((OpSel & (1 << J)) != 0) 8298 ModVal |= SISrcMods::OP_SEL_0; 8299 8300 if ((OpSelHi & (1 << J)) != 0) 8301 ModVal |= SISrcMods::OP_SEL_1; 8302 8303 if ((NegLo & (1 << J)) != 0) 8304 ModVal |= SISrcMods::NEG; 8305 8306 if ((NegHi & (1 << J)) != 0) 8307 ModVal |= SISrcMods::NEG_HI; 8308 8309 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8310 } 8311 } 8312 8313 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8314 OptionalImmIndexMap OptIdx; 8315 cvtVOP3(Inst, Operands, OptIdx); 8316 cvtVOP3P(Inst, Operands, OptIdx); 8317 } 8318 8319 //===----------------------------------------------------------------------===// 8320 // dpp 8321 //===----------------------------------------------------------------------===// 8322 8323 bool AMDGPUOperand::isDPP8() const { 8324 return isImmTy(ImmTyDPP8); 8325 } 8326 8327 bool AMDGPUOperand::isDPPCtrl() const { 8328 using namespace AMDGPU::DPP; 8329 8330 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8331 if (result) { 8332 int64_t Imm = getImm(); 8333 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8334 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8335 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8336 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8337 (Imm == DppCtrl::WAVE_SHL1) || 8338 (Imm == DppCtrl::WAVE_ROL1) || 8339 (Imm == DppCtrl::WAVE_SHR1) || 8340 (Imm == DppCtrl::WAVE_ROR1) || 8341 (Imm == DppCtrl::ROW_MIRROR) || 8342 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8343 (Imm == DppCtrl::BCAST15) || 8344 (Imm == DppCtrl::BCAST31) || 8345 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8346 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8347 } 8348 return false; 8349 } 8350 8351 //===----------------------------------------------------------------------===// 8352 // mAI 8353 //===----------------------------------------------------------------------===// 8354 8355 bool AMDGPUOperand::isBLGP() const { 8356 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8357 } 8358 8359 bool AMDGPUOperand::isCBSZ() const { 8360 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8361 } 8362 8363 bool AMDGPUOperand::isABID() const { 8364 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8365 } 8366 8367 bool AMDGPUOperand::isS16Imm() const { 8368 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8369 } 8370 8371 bool AMDGPUOperand::isU16Imm() const { 8372 return isImm() && isUInt<16>(getImm()); 8373 } 8374 8375 //===----------------------------------------------------------------------===// 8376 // dim 8377 //===----------------------------------------------------------------------===// 8378 8379 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8380 // We want to allow "dim:1D" etc., 8381 // but the initial 1 is tokenized as an integer. 8382 std::string Token; 8383 if (isToken(AsmToken::Integer)) { 8384 SMLoc Loc = getToken().getEndLoc(); 8385 Token = std::string(getTokenStr()); 8386 lex(); 8387 if (getLoc() != Loc) 8388 return false; 8389 } 8390 8391 StringRef Suffix; 8392 if (!parseId(Suffix)) 8393 return false; 8394 Token += Suffix; 8395 8396 StringRef DimId = Token; 8397 if (DimId.startswith("SQ_RSRC_IMG_")) 8398 DimId = DimId.drop_front(12); 8399 8400 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8401 if (!DimInfo) 8402 return false; 8403 8404 Encoding = DimInfo->Encoding; 8405 return true; 8406 } 8407 8408 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8409 if (!isGFX10Plus()) 8410 return MatchOperand_NoMatch; 8411 8412 SMLoc S = getLoc(); 8413 8414 if (!trySkipId("dim", AsmToken::Colon)) 8415 return MatchOperand_NoMatch; 8416 8417 unsigned Encoding; 8418 SMLoc Loc = getLoc(); 8419 if (!parseDimId(Encoding)) { 8420 Error(Loc, "invalid dim value"); 8421 return MatchOperand_ParseFail; 8422 } 8423 8424 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8425 AMDGPUOperand::ImmTyDim)); 8426 return MatchOperand_Success; 8427 } 8428 8429 //===----------------------------------------------------------------------===// 8430 // dpp 8431 //===----------------------------------------------------------------------===// 8432 8433 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8434 SMLoc S = getLoc(); 8435 8436 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8437 return MatchOperand_NoMatch; 8438 8439 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8440 8441 int64_t Sels[8]; 8442 8443 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8444 return MatchOperand_ParseFail; 8445 8446 for (size_t i = 0; i < 8; ++i) { 8447 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8448 return MatchOperand_ParseFail; 8449 8450 SMLoc Loc = getLoc(); 8451 if (getParser().parseAbsoluteExpression(Sels[i])) 8452 return MatchOperand_ParseFail; 8453 if (0 > Sels[i] || 7 < Sels[i]) { 8454 Error(Loc, "expected a 3-bit value"); 8455 return MatchOperand_ParseFail; 8456 } 8457 } 8458 8459 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8460 return MatchOperand_ParseFail; 8461 8462 unsigned DPP8 = 0; 8463 for (size_t i = 0; i < 8; ++i) 8464 DPP8 |= (Sels[i] << (i * 3)); 8465 8466 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8467 return MatchOperand_Success; 8468 } 8469 8470 bool 8471 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8472 const OperandVector &Operands) { 8473 if (Ctrl == "row_newbcast") 8474 return isGFX90A(); 8475 8476 if (Ctrl == "row_share" || 8477 Ctrl == "row_xmask") 8478 return isGFX10Plus(); 8479 8480 if (Ctrl == "wave_shl" || 8481 Ctrl == "wave_shr" || 8482 Ctrl == "wave_rol" || 8483 Ctrl == "wave_ror" || 8484 Ctrl == "row_bcast") 8485 return isVI() || isGFX9(); 8486 8487 return Ctrl == "row_mirror" || 8488 Ctrl == "row_half_mirror" || 8489 Ctrl == "quad_perm" || 8490 Ctrl == "row_shl" || 8491 Ctrl == "row_shr" || 8492 Ctrl == "row_ror"; 8493 } 8494 8495 int64_t 8496 AMDGPUAsmParser::parseDPPCtrlPerm() { 8497 // quad_perm:[%d,%d,%d,%d] 8498 8499 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8500 return -1; 8501 8502 int64_t Val = 0; 8503 for (int i = 0; i < 4; ++i) { 8504 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8505 return -1; 8506 8507 int64_t Temp; 8508 SMLoc Loc = getLoc(); 8509 if (getParser().parseAbsoluteExpression(Temp)) 8510 return -1; 8511 if (Temp < 0 || Temp > 3) { 8512 Error(Loc, "expected a 2-bit value"); 8513 return -1; 8514 } 8515 8516 Val += (Temp << i * 2); 8517 } 8518 8519 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8520 return -1; 8521 8522 return Val; 8523 } 8524 8525 int64_t 8526 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8527 using namespace AMDGPU::DPP; 8528 8529 // sel:%d 8530 8531 int64_t Val; 8532 SMLoc Loc = getLoc(); 8533 8534 if (getParser().parseAbsoluteExpression(Val)) 8535 return -1; 8536 8537 struct DppCtrlCheck { 8538 int64_t Ctrl; 8539 int Lo; 8540 int Hi; 8541 }; 8542 8543 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8544 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8545 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8546 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8547 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8548 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8549 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8550 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8551 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8552 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8553 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8554 .Default({-1, 0, 0}); 8555 8556 bool Valid; 8557 if (Check.Ctrl == -1) { 8558 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8559 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8560 } else { 8561 Valid = Check.Lo <= Val && Val <= Check.Hi; 8562 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8563 } 8564 8565 if (!Valid) { 8566 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8567 return -1; 8568 } 8569 8570 return Val; 8571 } 8572 8573 OperandMatchResultTy 8574 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8575 using namespace AMDGPU::DPP; 8576 8577 if (!isToken(AsmToken::Identifier) || 8578 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8579 return MatchOperand_NoMatch; 8580 8581 SMLoc S = getLoc(); 8582 int64_t Val = -1; 8583 StringRef Ctrl; 8584 8585 parseId(Ctrl); 8586 8587 if (Ctrl == "row_mirror") { 8588 Val = DppCtrl::ROW_MIRROR; 8589 } else if (Ctrl == "row_half_mirror") { 8590 Val = DppCtrl::ROW_HALF_MIRROR; 8591 } else { 8592 if (skipToken(AsmToken::Colon, "expected a colon")) { 8593 if (Ctrl == "quad_perm") { 8594 Val = parseDPPCtrlPerm(); 8595 } else { 8596 Val = parseDPPCtrlSel(Ctrl); 8597 } 8598 } 8599 } 8600 8601 if (Val == -1) 8602 return MatchOperand_ParseFail; 8603 8604 Operands.push_back( 8605 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8606 return MatchOperand_Success; 8607 } 8608 8609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8610 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8611 } 8612 8613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8614 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8615 } 8616 8617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8618 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8619 } 8620 8621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8622 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8623 } 8624 8625 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8626 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8627 } 8628 8629 // Add dummy $old operand 8630 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8631 const OperandVector &Operands, 8632 bool IsDPP8) { 8633 Inst.addOperand(MCOperand::createReg(0)); 8634 cvtVOP3DPP(Inst, Operands, IsDPP8); 8635 } 8636 8637 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8638 OptionalImmIndexMap OptionalIdx; 8639 unsigned Opc = Inst.getOpcode(); 8640 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8641 unsigned I = 1; 8642 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8643 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8644 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8645 } 8646 8647 int Fi = 0; 8648 for (unsigned E = Operands.size(); I != E; ++I) { 8649 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8650 MCOI::TIED_TO); 8651 if (TiedTo != -1) { 8652 assert((unsigned)TiedTo < Inst.getNumOperands()); 8653 // handle tied old or src2 for MAC instructions 8654 Inst.addOperand(Inst.getOperand(TiedTo)); 8655 } 8656 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8657 // Add the register arguments 8658 if (IsDPP8 && Op.isFI()) { 8659 Fi = Op.getImm(); 8660 } else if (HasModifiers && 8661 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8662 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8663 } else if (Op.isReg()) { 8664 Op.addRegOperands(Inst, 1); 8665 } else if (Op.isImm() && 8666 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8667 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8668 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8669 Op.addImmOperands(Inst, 1); 8670 } else if (Op.isImm()) { 8671 OptionalIdx[Op.getImmTy()] = I; 8672 } else { 8673 llvm_unreachable("unhandled operand type"); 8674 } 8675 } 8676 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8678 } 8679 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8681 } 8682 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8683 cvtVOP3P(Inst, Operands, OptionalIdx); 8684 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8686 } 8687 8688 if (IsDPP8) { 8689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8690 using namespace llvm::AMDGPU::DPP; 8691 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8692 } else { 8693 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8695 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8697 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8699 } 8700 } 8701 } 8702 8703 // Add dummy $old operand 8704 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8705 const OperandVector &Operands, 8706 bool IsDPP8) { 8707 Inst.addOperand(MCOperand::createReg(0)); 8708 cvtDPP(Inst, Operands, IsDPP8); 8709 } 8710 8711 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8712 OptionalImmIndexMap OptionalIdx; 8713 8714 unsigned Opc = Inst.getOpcode(); 8715 bool HasModifiers = 8716 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8717 unsigned I = 1; 8718 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8719 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8720 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8721 } 8722 8723 int Fi = 0; 8724 for (unsigned E = Operands.size(); I != E; ++I) { 8725 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8726 MCOI::TIED_TO); 8727 if (TiedTo != -1) { 8728 assert((unsigned)TiedTo < Inst.getNumOperands()); 8729 // handle tied old or src2 for MAC instructions 8730 Inst.addOperand(Inst.getOperand(TiedTo)); 8731 } 8732 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8733 // Add the register arguments 8734 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8735 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8736 // Skip it. 8737 continue; 8738 } 8739 8740 if (IsDPP8) { 8741 if (Op.isDPP8()) { 8742 Op.addImmOperands(Inst, 1); 8743 } else if (HasModifiers && 8744 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8745 Op.addRegWithFPInputModsOperands(Inst, 2); 8746 } else if (Op.isFI()) { 8747 Fi = Op.getImm(); 8748 } else if (Op.isReg()) { 8749 Op.addRegOperands(Inst, 1); 8750 } else { 8751 llvm_unreachable("Invalid operand type"); 8752 } 8753 } else { 8754 if (HasModifiers && 8755 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8756 Op.addRegWithFPInputModsOperands(Inst, 2); 8757 } else if (Op.isReg()) { 8758 Op.addRegOperands(Inst, 1); 8759 } else if (Op.isDPPCtrl()) { 8760 Op.addImmOperands(Inst, 1); 8761 } else if (Op.isImm()) { 8762 // Handle optional arguments 8763 OptionalIdx[Op.getImmTy()] = I; 8764 } else { 8765 llvm_unreachable("Invalid operand type"); 8766 } 8767 } 8768 } 8769 8770 if (IsDPP8) { 8771 using namespace llvm::AMDGPU::DPP; 8772 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8773 } else { 8774 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8777 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8779 } 8780 } 8781 } 8782 8783 //===----------------------------------------------------------------------===// 8784 // sdwa 8785 //===----------------------------------------------------------------------===// 8786 8787 OperandMatchResultTy 8788 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8789 AMDGPUOperand::ImmTy Type) { 8790 using namespace llvm::AMDGPU::SDWA; 8791 8792 SMLoc S = getLoc(); 8793 StringRef Value; 8794 OperandMatchResultTy res; 8795 8796 SMLoc StringLoc; 8797 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8798 if (res != MatchOperand_Success) { 8799 return res; 8800 } 8801 8802 int64_t Int; 8803 Int = StringSwitch<int64_t>(Value) 8804 .Case("BYTE_0", SdwaSel::BYTE_0) 8805 .Case("BYTE_1", SdwaSel::BYTE_1) 8806 .Case("BYTE_2", SdwaSel::BYTE_2) 8807 .Case("BYTE_3", SdwaSel::BYTE_3) 8808 .Case("WORD_0", SdwaSel::WORD_0) 8809 .Case("WORD_1", SdwaSel::WORD_1) 8810 .Case("DWORD", SdwaSel::DWORD) 8811 .Default(0xffffffff); 8812 8813 if (Int == 0xffffffff) { 8814 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8815 return MatchOperand_ParseFail; 8816 } 8817 8818 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8819 return MatchOperand_Success; 8820 } 8821 8822 OperandMatchResultTy 8823 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8824 using namespace llvm::AMDGPU::SDWA; 8825 8826 SMLoc S = getLoc(); 8827 StringRef Value; 8828 OperandMatchResultTy res; 8829 8830 SMLoc StringLoc; 8831 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8832 if (res != MatchOperand_Success) { 8833 return res; 8834 } 8835 8836 int64_t Int; 8837 Int = StringSwitch<int64_t>(Value) 8838 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8839 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8840 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8841 .Default(0xffffffff); 8842 8843 if (Int == 0xffffffff) { 8844 Error(StringLoc, "invalid dst_unused value"); 8845 return MatchOperand_ParseFail; 8846 } 8847 8848 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8849 return MatchOperand_Success; 8850 } 8851 8852 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8853 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8854 } 8855 8856 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8857 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8858 } 8859 8860 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8861 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8862 } 8863 8864 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8865 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8866 } 8867 8868 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8869 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8870 } 8871 8872 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8873 uint64_t BasicInstType, 8874 bool SkipDstVcc, 8875 bool SkipSrcVcc) { 8876 using namespace llvm::AMDGPU::SDWA; 8877 8878 OptionalImmIndexMap OptionalIdx; 8879 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8880 bool SkippedVcc = false; 8881 8882 unsigned I = 1; 8883 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8884 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8885 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8886 } 8887 8888 for (unsigned E = Operands.size(); I != E; ++I) { 8889 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8890 if (SkipVcc && !SkippedVcc && Op.isReg() && 8891 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8892 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8893 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8894 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8895 // Skip VCC only if we didn't skip it on previous iteration. 8896 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8897 if (BasicInstType == SIInstrFlags::VOP2 && 8898 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8899 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8900 SkippedVcc = true; 8901 continue; 8902 } else if (BasicInstType == SIInstrFlags::VOPC && 8903 Inst.getNumOperands() == 0) { 8904 SkippedVcc = true; 8905 continue; 8906 } 8907 } 8908 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8909 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8910 } else if (Op.isImm()) { 8911 // Handle optional arguments 8912 OptionalIdx[Op.getImmTy()] = I; 8913 } else { 8914 llvm_unreachable("Invalid operand type"); 8915 } 8916 SkippedVcc = false; 8917 } 8918 8919 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8920 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8921 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8922 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8923 switch (BasicInstType) { 8924 case SIInstrFlags::VOP1: 8925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8926 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8927 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8928 } 8929 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8932 break; 8933 8934 case SIInstrFlags::VOP2: 8935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8936 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8938 } 8939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8943 break; 8944 8945 case SIInstrFlags::VOPC: 8946 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8948 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8950 break; 8951 8952 default: 8953 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8954 } 8955 } 8956 8957 // special case v_mac_{f16, f32}: 8958 // it has src2 register operand that is tied to dst operand 8959 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8960 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8961 auto it = Inst.begin(); 8962 std::advance( 8963 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8964 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8965 } 8966 } 8967 8968 //===----------------------------------------------------------------------===// 8969 // mAI 8970 //===----------------------------------------------------------------------===// 8971 8972 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8973 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8974 } 8975 8976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8977 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8978 } 8979 8980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8981 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8982 } 8983 8984 /// Force static initialization. 8985 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8986 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8987 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8988 } 8989 8990 #define GET_REGISTER_MATCHER 8991 #define GET_MATCHER_IMPLEMENTATION 8992 #define GET_MNEMONIC_SPELL_CHECKER 8993 #define GET_MNEMONIC_CHECKER 8994 #include "AMDGPUGenAsmMatcher.inc" 8995 8996 // This function should be defined after auto-generated include so that we have 8997 // MatchClassKind enum defined 8998 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8999 unsigned Kind) { 9000 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9001 // But MatchInstructionImpl() expects to meet token and fails to validate 9002 // operand. This method checks if we are given immediate operand but expect to 9003 // get corresponding token. 9004 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9005 switch (Kind) { 9006 case MCK_addr64: 9007 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9008 case MCK_gds: 9009 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9010 case MCK_lds: 9011 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9012 case MCK_idxen: 9013 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9014 case MCK_offen: 9015 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9016 case MCK_SSrcB32: 9017 // When operands have expression values, they will return true for isToken, 9018 // because it is not possible to distinguish between a token and an 9019 // expression at parse time. MatchInstructionImpl() will always try to 9020 // match an operand as a token, when isToken returns true, and when the 9021 // name of the expression is not a valid token, the match will fail, 9022 // so we need to handle it here. 9023 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9024 case MCK_SSrcF32: 9025 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9026 case MCK_SoppBrTarget: 9027 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9028 case MCK_VReg32OrOff: 9029 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9030 case MCK_InterpSlot: 9031 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9032 case MCK_Attr: 9033 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9034 case MCK_AttrChan: 9035 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9036 case MCK_ImmSMEMOffset: 9037 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9038 case MCK_SReg_64: 9039 case MCK_SReg_64_XEXEC: 9040 // Null is defined as a 32-bit register but 9041 // it should also be enabled with 64-bit operands. 9042 // The following code enables it for SReg_64 operands 9043 // used as source and destination. Remaining source 9044 // operands are handled in isInlinableImm. 9045 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9046 default: 9047 return Match_InvalidOperand; 9048 } 9049 } 9050 9051 //===----------------------------------------------------------------------===// 9052 // endpgm 9053 //===----------------------------------------------------------------------===// 9054 9055 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9056 SMLoc S = getLoc(); 9057 int64_t Imm = 0; 9058 9059 if (!parseExpr(Imm)) { 9060 // The operand is optional, if not present default to 0 9061 Imm = 0; 9062 } 9063 9064 if (!isUInt<16>(Imm)) { 9065 Error(S, "expected a 16-bit value"); 9066 return MatchOperand_ParseFail; 9067 } 9068 9069 Operands.push_back( 9070 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9071 return MatchOperand_Success; 9072 } 9073 9074 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9075 9076 //===----------------------------------------------------------------------===// 9077 // LDSDIR 9078 //===----------------------------------------------------------------------===// 9079 9080 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9081 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9082 } 9083 9084 bool AMDGPUOperand::isWaitVDST() const { 9085 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9086 } 9087 9088 //===----------------------------------------------------------------------===// 9089 // VINTERP 9090 //===----------------------------------------------------------------------===// 9091 9092 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9093 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9094 } 9095 9096 bool AMDGPUOperand::isWaitEXP() const { 9097 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9098 } 9099