1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(bool ShouldSkipSpace = true); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1742 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1743 OptionalImmIndexMap &OptionalIdx); 1744 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1745 OptionalImmIndexMap &OptionalIdx); 1746 1747 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1748 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1749 1750 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1751 bool IsAtomic = false); 1752 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1753 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1754 1755 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1756 1757 bool parseDimId(unsigned &Encoding); 1758 OperandMatchResultTy parseDim(OperandVector &Operands); 1759 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1760 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1761 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1762 int64_t parseDPPCtrlSel(StringRef Ctrl); 1763 int64_t parseDPPCtrlPerm(); 1764 AMDGPUOperand::Ptr defaultRowMask() const; 1765 AMDGPUOperand::Ptr defaultBankMask() const; 1766 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1767 AMDGPUOperand::Ptr defaultFI() const; 1768 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1769 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1770 cvtDPP(Inst, Operands, true); 1771 } 1772 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1773 bool IsDPP8 = false); 1774 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1775 cvtVOPCNoDstDPP(Inst, Operands, true); 1776 } 1777 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1778 bool IsDPP8 = false); 1779 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1780 cvtVOP3DPP(Inst, Operands, true); 1781 } 1782 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1783 bool IsDPP8 = false); 1784 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1785 cvtVOPC64NoDstDPP(Inst, Operands, true); 1786 } 1787 1788 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1789 AMDGPUOperand::ImmTy Type); 1790 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1791 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1794 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1795 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1796 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1797 uint64_t BasicInstType, 1798 bool SkipDstVcc = false, 1799 bool SkipSrcVcc = false); 1800 1801 AMDGPUOperand::Ptr defaultBLGP() const; 1802 AMDGPUOperand::Ptr defaultCBSZ() const; 1803 AMDGPUOperand::Ptr defaultABID() const; 1804 1805 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1806 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1807 1808 AMDGPUOperand::Ptr defaultWaitVDST() const; 1809 AMDGPUOperand::Ptr defaultWaitEXP() const; 1810 OperandMatchResultTy parseVOPD(OperandVector &Operands); 1811 }; 1812 1813 struct OptionalOperand { 1814 const char *Name; 1815 AMDGPUOperand::ImmTy Type; 1816 bool IsBit; 1817 bool (*ConvertResult)(int64_t&); 1818 }; 1819 1820 } // end anonymous namespace 1821 1822 // May be called with integer type with equivalent bitwidth. 1823 static const fltSemantics *getFltSemantics(unsigned Size) { 1824 switch (Size) { 1825 case 4: 1826 return &APFloat::IEEEsingle(); 1827 case 8: 1828 return &APFloat::IEEEdouble(); 1829 case 2: 1830 return &APFloat::IEEEhalf(); 1831 default: 1832 llvm_unreachable("unsupported fp type"); 1833 } 1834 } 1835 1836 static const fltSemantics *getFltSemantics(MVT VT) { 1837 return getFltSemantics(VT.getSizeInBits() / 8); 1838 } 1839 1840 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1841 switch (OperandType) { 1842 case AMDGPU::OPERAND_REG_IMM_INT32: 1843 case AMDGPU::OPERAND_REG_IMM_FP32: 1844 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1845 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1846 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1849 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1850 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1851 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1852 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1853 case AMDGPU::OPERAND_KIMM32: 1854 return &APFloat::IEEEsingle(); 1855 case AMDGPU::OPERAND_REG_IMM_INT64: 1856 case AMDGPU::OPERAND_REG_IMM_FP64: 1857 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1858 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1860 return &APFloat::IEEEdouble(); 1861 case AMDGPU::OPERAND_REG_IMM_INT16: 1862 case AMDGPU::OPERAND_REG_IMM_FP16: 1863 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1864 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1869 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1870 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1871 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1872 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1873 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1874 case AMDGPU::OPERAND_KIMM16: 1875 return &APFloat::IEEEhalf(); 1876 default: 1877 llvm_unreachable("unsupported fp type"); 1878 } 1879 } 1880 1881 //===----------------------------------------------------------------------===// 1882 // Operand 1883 //===----------------------------------------------------------------------===// 1884 1885 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1886 bool Lost; 1887 1888 // Convert literal to single precision 1889 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1890 APFloat::rmNearestTiesToEven, 1891 &Lost); 1892 // We allow precision lost but not overflow or underflow 1893 if (Status != APFloat::opOK && 1894 Lost && 1895 ((Status & APFloat::opOverflow) != 0 || 1896 (Status & APFloat::opUnderflow) != 0)) { 1897 return false; 1898 } 1899 1900 return true; 1901 } 1902 1903 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1904 return isUIntN(Size, Val) || isIntN(Size, Val); 1905 } 1906 1907 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1908 if (VT.getScalarType() == MVT::i16) { 1909 // FP immediate values are broken. 1910 return isInlinableIntLiteral(Val); 1911 } 1912 1913 // f16/v2f16 operands work correctly for all values. 1914 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1915 } 1916 1917 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1918 1919 // This is a hack to enable named inline values like 1920 // shared_base with both 32-bit and 64-bit operands. 1921 // Note that these values are defined as 1922 // 32-bit operands only. 1923 if (isInlineValue()) { 1924 return true; 1925 } 1926 1927 if (!isImmTy(ImmTyNone)) { 1928 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1929 return false; 1930 } 1931 // TODO: We should avoid using host float here. It would be better to 1932 // check the float bit values which is what a few other places do. 1933 // We've had bot failures before due to weird NaN support on mips hosts. 1934 1935 APInt Literal(64, Imm.Val); 1936 1937 if (Imm.IsFPImm) { // We got fp literal token 1938 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1939 return AMDGPU::isInlinableLiteral64(Imm.Val, 1940 AsmParser->hasInv2PiInlineImm()); 1941 } 1942 1943 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1944 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1945 return false; 1946 1947 if (type.getScalarSizeInBits() == 16) { 1948 return isInlineableLiteralOp16( 1949 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1950 type, AsmParser->hasInv2PiInlineImm()); 1951 } 1952 1953 // Check if single precision literal is inlinable 1954 return AMDGPU::isInlinableLiteral32( 1955 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1956 AsmParser->hasInv2PiInlineImm()); 1957 } 1958 1959 // We got int literal token. 1960 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1961 return AMDGPU::isInlinableLiteral64(Imm.Val, 1962 AsmParser->hasInv2PiInlineImm()); 1963 } 1964 1965 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1966 return false; 1967 } 1968 1969 if (type.getScalarSizeInBits() == 16) { 1970 return isInlineableLiteralOp16( 1971 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1972 type, AsmParser->hasInv2PiInlineImm()); 1973 } 1974 1975 return AMDGPU::isInlinableLiteral32( 1976 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1977 AsmParser->hasInv2PiInlineImm()); 1978 } 1979 1980 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1981 // Check that this immediate can be added as literal 1982 if (!isImmTy(ImmTyNone)) { 1983 return false; 1984 } 1985 1986 if (!Imm.IsFPImm) { 1987 // We got int literal token. 1988 1989 if (type == MVT::f64 && hasFPModifiers()) { 1990 // Cannot apply fp modifiers to int literals preserving the same semantics 1991 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1992 // disable these cases. 1993 return false; 1994 } 1995 1996 unsigned Size = type.getSizeInBits(); 1997 if (Size == 64) 1998 Size = 32; 1999 2000 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2001 // types. 2002 return isSafeTruncation(Imm.Val, Size); 2003 } 2004 2005 // We got fp literal token 2006 if (type == MVT::f64) { // Expected 64-bit fp operand 2007 // We would set low 64-bits of literal to zeroes but we accept this literals 2008 return true; 2009 } 2010 2011 if (type == MVT::i64) { // Expected 64-bit int operand 2012 // We don't allow fp literals in 64-bit integer instructions. It is 2013 // unclear how we should encode them. 2014 return false; 2015 } 2016 2017 // We allow fp literals with f16x2 operands assuming that the specified 2018 // literal goes into the lower half and the upper half is zero. We also 2019 // require that the literal may be losslessly converted to f16. 2020 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2021 (type == MVT::v2i16)? MVT::i16 : 2022 (type == MVT::v2f32)? MVT::f32 : type; 2023 2024 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2025 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2026 } 2027 2028 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2029 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2030 } 2031 2032 bool AMDGPUOperand::isVRegWithInputMods() const { 2033 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2034 // GFX90A allows DPP on 64-bit operands. 2035 (isRegClass(AMDGPU::VReg_64RegClassID) && 2036 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2037 } 2038 2039 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2040 if (AsmParser->isVI()) 2041 return isVReg32(); 2042 else if (AsmParser->isGFX9Plus()) 2043 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2044 else 2045 return false; 2046 } 2047 2048 bool AMDGPUOperand::isSDWAFP16Operand() const { 2049 return isSDWAOperand(MVT::f16); 2050 } 2051 2052 bool AMDGPUOperand::isSDWAFP32Operand() const { 2053 return isSDWAOperand(MVT::f32); 2054 } 2055 2056 bool AMDGPUOperand::isSDWAInt16Operand() const { 2057 return isSDWAOperand(MVT::i16); 2058 } 2059 2060 bool AMDGPUOperand::isSDWAInt32Operand() const { 2061 return isSDWAOperand(MVT::i32); 2062 } 2063 2064 bool AMDGPUOperand::isBoolReg() const { 2065 auto FB = AsmParser->getFeatureBits(); 2066 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2067 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2068 } 2069 2070 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2071 { 2072 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2073 assert(Size == 2 || Size == 4 || Size == 8); 2074 2075 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2076 2077 if (Imm.Mods.Abs) { 2078 Val &= ~FpSignMask; 2079 } 2080 if (Imm.Mods.Neg) { 2081 Val ^= FpSignMask; 2082 } 2083 2084 return Val; 2085 } 2086 2087 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2088 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2089 Inst.getNumOperands())) { 2090 addLiteralImmOperand(Inst, Imm.Val, 2091 ApplyModifiers & 2092 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2093 } else { 2094 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2095 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2096 setImmKindNone(); 2097 } 2098 } 2099 2100 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2101 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2102 auto OpNum = Inst.getNumOperands(); 2103 // Check that this operand accepts literals 2104 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2105 2106 if (ApplyModifiers) { 2107 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2108 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2109 Val = applyInputFPModifiers(Val, Size); 2110 } 2111 2112 APInt Literal(64, Val); 2113 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2114 2115 if (Imm.IsFPImm) { // We got fp literal token 2116 switch (OpTy) { 2117 case AMDGPU::OPERAND_REG_IMM_INT64: 2118 case AMDGPU::OPERAND_REG_IMM_FP64: 2119 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2120 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2122 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2123 AsmParser->hasInv2PiInlineImm())) { 2124 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2125 setImmKindConst(); 2126 return; 2127 } 2128 2129 // Non-inlineable 2130 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2131 // For fp operands we check if low 32 bits are zeros 2132 if (Literal.getLoBits(32) != 0) { 2133 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2134 "Can't encode literal as exact 64-bit floating-point operand. " 2135 "Low 32-bits will be set to zero"); 2136 } 2137 2138 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2139 setImmKindLiteral(); 2140 return; 2141 } 2142 2143 // We don't allow fp literals in 64-bit integer instructions. It is 2144 // unclear how we should encode them. This case should be checked earlier 2145 // in predicate methods (isLiteralImm()) 2146 llvm_unreachable("fp literal in 64-bit integer instruction."); 2147 2148 case AMDGPU::OPERAND_REG_IMM_INT32: 2149 case AMDGPU::OPERAND_REG_IMM_FP32: 2150 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2151 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2152 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2153 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2154 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2155 case AMDGPU::OPERAND_REG_IMM_INT16: 2156 case AMDGPU::OPERAND_REG_IMM_FP16: 2157 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2158 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2164 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2165 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2166 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2167 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2168 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2169 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2170 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2171 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2172 case AMDGPU::OPERAND_KIMM32: 2173 case AMDGPU::OPERAND_KIMM16: { 2174 bool lost; 2175 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2176 // Convert literal to single precision 2177 FPLiteral.convert(*getOpFltSemantics(OpTy), 2178 APFloat::rmNearestTiesToEven, &lost); 2179 // We allow precision lost but not overflow or underflow. This should be 2180 // checked earlier in isLiteralImm() 2181 2182 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2183 Inst.addOperand(MCOperand::createImm(ImmVal)); 2184 setImmKindLiteral(); 2185 return; 2186 } 2187 default: 2188 llvm_unreachable("invalid operand size"); 2189 } 2190 2191 return; 2192 } 2193 2194 // We got int literal token. 2195 // Only sign extend inline immediates. 2196 switch (OpTy) { 2197 case AMDGPU::OPERAND_REG_IMM_INT32: 2198 case AMDGPU::OPERAND_REG_IMM_FP32: 2199 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2200 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2201 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2202 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2203 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2204 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2205 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2206 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2207 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2208 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2209 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2210 if (isSafeTruncation(Val, 32) && 2211 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2212 AsmParser->hasInv2PiInlineImm())) { 2213 Inst.addOperand(MCOperand::createImm(Val)); 2214 setImmKindConst(); 2215 return; 2216 } 2217 2218 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2219 setImmKindLiteral(); 2220 return; 2221 2222 case AMDGPU::OPERAND_REG_IMM_INT64: 2223 case AMDGPU::OPERAND_REG_IMM_FP64: 2224 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2225 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2226 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2227 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2228 Inst.addOperand(MCOperand::createImm(Val)); 2229 setImmKindConst(); 2230 return; 2231 } 2232 2233 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2234 setImmKindLiteral(); 2235 return; 2236 2237 case AMDGPU::OPERAND_REG_IMM_INT16: 2238 case AMDGPU::OPERAND_REG_IMM_FP16: 2239 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2240 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2241 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2242 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2243 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2244 if (isSafeTruncation(Val, 16) && 2245 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2246 AsmParser->hasInv2PiInlineImm())) { 2247 Inst.addOperand(MCOperand::createImm(Val)); 2248 setImmKindConst(); 2249 return; 2250 } 2251 2252 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2253 setImmKindLiteral(); 2254 return; 2255 2256 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2257 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2258 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2259 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2260 assert(isSafeTruncation(Val, 16)); 2261 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2262 AsmParser->hasInv2PiInlineImm())); 2263 2264 Inst.addOperand(MCOperand::createImm(Val)); 2265 return; 2266 } 2267 case AMDGPU::OPERAND_KIMM32: 2268 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2269 setImmKindNone(); 2270 return; 2271 case AMDGPU::OPERAND_KIMM16: 2272 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2273 setImmKindNone(); 2274 return; 2275 default: 2276 llvm_unreachable("invalid operand size"); 2277 } 2278 } 2279 2280 template <unsigned Bitwidth> 2281 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2282 APInt Literal(64, Imm.Val); 2283 setImmKindNone(); 2284 2285 if (!Imm.IsFPImm) { 2286 // We got int literal token. 2287 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2288 return; 2289 } 2290 2291 bool Lost; 2292 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2293 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2294 APFloat::rmNearestTiesToEven, &Lost); 2295 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2296 } 2297 2298 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2299 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2300 } 2301 2302 static bool isInlineValue(unsigned Reg) { 2303 switch (Reg) { 2304 case AMDGPU::SRC_SHARED_BASE: 2305 case AMDGPU::SRC_SHARED_LIMIT: 2306 case AMDGPU::SRC_PRIVATE_BASE: 2307 case AMDGPU::SRC_PRIVATE_LIMIT: 2308 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2309 return true; 2310 case AMDGPU::SRC_VCCZ: 2311 case AMDGPU::SRC_EXECZ: 2312 case AMDGPU::SRC_SCC: 2313 return true; 2314 case AMDGPU::SGPR_NULL: 2315 return true; 2316 default: 2317 return false; 2318 } 2319 } 2320 2321 bool AMDGPUOperand::isInlineValue() const { 2322 return isRegKind() && ::isInlineValue(getReg()); 2323 } 2324 2325 //===----------------------------------------------------------------------===// 2326 // AsmParser 2327 //===----------------------------------------------------------------------===// 2328 2329 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2330 if (Is == IS_VGPR) { 2331 switch (RegWidth) { 2332 default: return -1; 2333 case 32: 2334 return AMDGPU::VGPR_32RegClassID; 2335 case 64: 2336 return AMDGPU::VReg_64RegClassID; 2337 case 96: 2338 return AMDGPU::VReg_96RegClassID; 2339 case 128: 2340 return AMDGPU::VReg_128RegClassID; 2341 case 160: 2342 return AMDGPU::VReg_160RegClassID; 2343 case 192: 2344 return AMDGPU::VReg_192RegClassID; 2345 case 224: 2346 return AMDGPU::VReg_224RegClassID; 2347 case 256: 2348 return AMDGPU::VReg_256RegClassID; 2349 case 512: 2350 return AMDGPU::VReg_512RegClassID; 2351 case 1024: 2352 return AMDGPU::VReg_1024RegClassID; 2353 } 2354 } else if (Is == IS_TTMP) { 2355 switch (RegWidth) { 2356 default: return -1; 2357 case 32: 2358 return AMDGPU::TTMP_32RegClassID; 2359 case 64: 2360 return AMDGPU::TTMP_64RegClassID; 2361 case 128: 2362 return AMDGPU::TTMP_128RegClassID; 2363 case 256: 2364 return AMDGPU::TTMP_256RegClassID; 2365 case 512: 2366 return AMDGPU::TTMP_512RegClassID; 2367 } 2368 } else if (Is == IS_SGPR) { 2369 switch (RegWidth) { 2370 default: return -1; 2371 case 32: 2372 return AMDGPU::SGPR_32RegClassID; 2373 case 64: 2374 return AMDGPU::SGPR_64RegClassID; 2375 case 96: 2376 return AMDGPU::SGPR_96RegClassID; 2377 case 128: 2378 return AMDGPU::SGPR_128RegClassID; 2379 case 160: 2380 return AMDGPU::SGPR_160RegClassID; 2381 case 192: 2382 return AMDGPU::SGPR_192RegClassID; 2383 case 224: 2384 return AMDGPU::SGPR_224RegClassID; 2385 case 256: 2386 return AMDGPU::SGPR_256RegClassID; 2387 case 512: 2388 return AMDGPU::SGPR_512RegClassID; 2389 } 2390 } else if (Is == IS_AGPR) { 2391 switch (RegWidth) { 2392 default: return -1; 2393 case 32: 2394 return AMDGPU::AGPR_32RegClassID; 2395 case 64: 2396 return AMDGPU::AReg_64RegClassID; 2397 case 96: 2398 return AMDGPU::AReg_96RegClassID; 2399 case 128: 2400 return AMDGPU::AReg_128RegClassID; 2401 case 160: 2402 return AMDGPU::AReg_160RegClassID; 2403 case 192: 2404 return AMDGPU::AReg_192RegClassID; 2405 case 224: 2406 return AMDGPU::AReg_224RegClassID; 2407 case 256: 2408 return AMDGPU::AReg_256RegClassID; 2409 case 512: 2410 return AMDGPU::AReg_512RegClassID; 2411 case 1024: 2412 return AMDGPU::AReg_1024RegClassID; 2413 } 2414 } 2415 return -1; 2416 } 2417 2418 static unsigned getSpecialRegForName(StringRef RegName) { 2419 return StringSwitch<unsigned>(RegName) 2420 .Case("exec", AMDGPU::EXEC) 2421 .Case("vcc", AMDGPU::VCC) 2422 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2423 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2424 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2425 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2426 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2427 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2428 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2429 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2430 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2431 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2432 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2433 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2434 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2435 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2436 .Case("m0", AMDGPU::M0) 2437 .Case("vccz", AMDGPU::SRC_VCCZ) 2438 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2439 .Case("execz", AMDGPU::SRC_EXECZ) 2440 .Case("src_execz", AMDGPU::SRC_EXECZ) 2441 .Case("scc", AMDGPU::SRC_SCC) 2442 .Case("src_scc", AMDGPU::SRC_SCC) 2443 .Case("tba", AMDGPU::TBA) 2444 .Case("tma", AMDGPU::TMA) 2445 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2446 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2447 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2448 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2449 .Case("vcc_lo", AMDGPU::VCC_LO) 2450 .Case("vcc_hi", AMDGPU::VCC_HI) 2451 .Case("exec_lo", AMDGPU::EXEC_LO) 2452 .Case("exec_hi", AMDGPU::EXEC_HI) 2453 .Case("tma_lo", AMDGPU::TMA_LO) 2454 .Case("tma_hi", AMDGPU::TMA_HI) 2455 .Case("tba_lo", AMDGPU::TBA_LO) 2456 .Case("tba_hi", AMDGPU::TBA_HI) 2457 .Case("pc", AMDGPU::PC_REG) 2458 .Case("null", AMDGPU::SGPR_NULL) 2459 .Default(AMDGPU::NoRegister); 2460 } 2461 2462 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2463 SMLoc &EndLoc, bool RestoreOnFailure) { 2464 auto R = parseRegister(); 2465 if (!R) return true; 2466 assert(R->isReg()); 2467 RegNo = R->getReg(); 2468 StartLoc = R->getStartLoc(); 2469 EndLoc = R->getEndLoc(); 2470 return false; 2471 } 2472 2473 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2474 SMLoc &EndLoc) { 2475 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2476 } 2477 2478 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2479 SMLoc &StartLoc, 2480 SMLoc &EndLoc) { 2481 bool Result = 2482 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2483 bool PendingErrors = getParser().hasPendingError(); 2484 getParser().clearPendingErrors(); 2485 if (PendingErrors) 2486 return MatchOperand_ParseFail; 2487 if (Result) 2488 return MatchOperand_NoMatch; 2489 return MatchOperand_Success; 2490 } 2491 2492 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2493 RegisterKind RegKind, unsigned Reg1, 2494 SMLoc Loc) { 2495 switch (RegKind) { 2496 case IS_SPECIAL: 2497 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2498 Reg = AMDGPU::EXEC; 2499 RegWidth = 64; 2500 return true; 2501 } 2502 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2503 Reg = AMDGPU::FLAT_SCR; 2504 RegWidth = 64; 2505 return true; 2506 } 2507 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2508 Reg = AMDGPU::XNACK_MASK; 2509 RegWidth = 64; 2510 return true; 2511 } 2512 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2513 Reg = AMDGPU::VCC; 2514 RegWidth = 64; 2515 return true; 2516 } 2517 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2518 Reg = AMDGPU::TBA; 2519 RegWidth = 64; 2520 return true; 2521 } 2522 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2523 Reg = AMDGPU::TMA; 2524 RegWidth = 64; 2525 return true; 2526 } 2527 Error(Loc, "register does not fit in the list"); 2528 return false; 2529 case IS_VGPR: 2530 case IS_SGPR: 2531 case IS_AGPR: 2532 case IS_TTMP: 2533 if (Reg1 != Reg + RegWidth / 32) { 2534 Error(Loc, "registers in a list must have consecutive indices"); 2535 return false; 2536 } 2537 RegWidth += 32; 2538 return true; 2539 default: 2540 llvm_unreachable("unexpected register kind"); 2541 } 2542 } 2543 2544 struct RegInfo { 2545 StringLiteral Name; 2546 RegisterKind Kind; 2547 }; 2548 2549 static constexpr RegInfo RegularRegisters[] = { 2550 {{"v"}, IS_VGPR}, 2551 {{"s"}, IS_SGPR}, 2552 {{"ttmp"}, IS_TTMP}, 2553 {{"acc"}, IS_AGPR}, 2554 {{"a"}, IS_AGPR}, 2555 }; 2556 2557 static bool isRegularReg(RegisterKind Kind) { 2558 return Kind == IS_VGPR || 2559 Kind == IS_SGPR || 2560 Kind == IS_TTMP || 2561 Kind == IS_AGPR; 2562 } 2563 2564 static const RegInfo* getRegularRegInfo(StringRef Str) { 2565 for (const RegInfo &Reg : RegularRegisters) 2566 if (Str.startswith(Reg.Name)) 2567 return &Reg; 2568 return nullptr; 2569 } 2570 2571 static bool getRegNum(StringRef Str, unsigned& Num) { 2572 return !Str.getAsInteger(10, Num); 2573 } 2574 2575 bool 2576 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2577 const AsmToken &NextToken) const { 2578 2579 // A list of consecutive registers: [s0,s1,s2,s3] 2580 if (Token.is(AsmToken::LBrac)) 2581 return true; 2582 2583 if (!Token.is(AsmToken::Identifier)) 2584 return false; 2585 2586 // A single register like s0 or a range of registers like s[0:1] 2587 2588 StringRef Str = Token.getString(); 2589 const RegInfo *Reg = getRegularRegInfo(Str); 2590 if (Reg) { 2591 StringRef RegName = Reg->Name; 2592 StringRef RegSuffix = Str.substr(RegName.size()); 2593 if (!RegSuffix.empty()) { 2594 unsigned Num; 2595 // A single register with an index: rXX 2596 if (getRegNum(RegSuffix, Num)) 2597 return true; 2598 } else { 2599 // A range of registers: r[XX:YY]. 2600 if (NextToken.is(AsmToken::LBrac)) 2601 return true; 2602 } 2603 } 2604 2605 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2606 } 2607 2608 bool 2609 AMDGPUAsmParser::isRegister() 2610 { 2611 return isRegister(getToken(), peekToken()); 2612 } 2613 2614 unsigned 2615 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2616 unsigned RegNum, 2617 unsigned RegWidth, 2618 SMLoc Loc) { 2619 2620 assert(isRegularReg(RegKind)); 2621 2622 unsigned AlignSize = 1; 2623 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2624 // SGPR and TTMP registers must be aligned. 2625 // Max required alignment is 4 dwords. 2626 AlignSize = std::min(RegWidth / 32, 4u); 2627 } 2628 2629 if (RegNum % AlignSize != 0) { 2630 Error(Loc, "invalid register alignment"); 2631 return AMDGPU::NoRegister; 2632 } 2633 2634 unsigned RegIdx = RegNum / AlignSize; 2635 int RCID = getRegClass(RegKind, RegWidth); 2636 if (RCID == -1) { 2637 Error(Loc, "invalid or unsupported register size"); 2638 return AMDGPU::NoRegister; 2639 } 2640 2641 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2642 const MCRegisterClass RC = TRI->getRegClass(RCID); 2643 if (RegIdx >= RC.getNumRegs()) { 2644 Error(Loc, "register index is out of range"); 2645 return AMDGPU::NoRegister; 2646 } 2647 2648 return RC.getRegister(RegIdx); 2649 } 2650 2651 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2652 int64_t RegLo, RegHi; 2653 if (!skipToken(AsmToken::LBrac, "missing register index")) 2654 return false; 2655 2656 SMLoc FirstIdxLoc = getLoc(); 2657 SMLoc SecondIdxLoc; 2658 2659 if (!parseExpr(RegLo)) 2660 return false; 2661 2662 if (trySkipToken(AsmToken::Colon)) { 2663 SecondIdxLoc = getLoc(); 2664 if (!parseExpr(RegHi)) 2665 return false; 2666 } else { 2667 RegHi = RegLo; 2668 } 2669 2670 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2671 return false; 2672 2673 if (!isUInt<32>(RegLo)) { 2674 Error(FirstIdxLoc, "invalid register index"); 2675 return false; 2676 } 2677 2678 if (!isUInt<32>(RegHi)) { 2679 Error(SecondIdxLoc, "invalid register index"); 2680 return false; 2681 } 2682 2683 if (RegLo > RegHi) { 2684 Error(FirstIdxLoc, "first register index should not exceed second index"); 2685 return false; 2686 } 2687 2688 Num = static_cast<unsigned>(RegLo); 2689 RegWidth = 32 * ((RegHi - RegLo) + 1); 2690 return true; 2691 } 2692 2693 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2694 unsigned &RegNum, unsigned &RegWidth, 2695 SmallVectorImpl<AsmToken> &Tokens) { 2696 assert(isToken(AsmToken::Identifier)); 2697 unsigned Reg = getSpecialRegForName(getTokenStr()); 2698 if (Reg) { 2699 RegNum = 0; 2700 RegWidth = 32; 2701 RegKind = IS_SPECIAL; 2702 Tokens.push_back(getToken()); 2703 lex(); // skip register name 2704 } 2705 return Reg; 2706 } 2707 2708 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2709 unsigned &RegNum, unsigned &RegWidth, 2710 SmallVectorImpl<AsmToken> &Tokens) { 2711 assert(isToken(AsmToken::Identifier)); 2712 StringRef RegName = getTokenStr(); 2713 auto Loc = getLoc(); 2714 2715 const RegInfo *RI = getRegularRegInfo(RegName); 2716 if (!RI) { 2717 Error(Loc, "invalid register name"); 2718 return AMDGPU::NoRegister; 2719 } 2720 2721 Tokens.push_back(getToken()); 2722 lex(); // skip register name 2723 2724 RegKind = RI->Kind; 2725 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2726 if (!RegSuffix.empty()) { 2727 // Single 32-bit register: vXX. 2728 if (!getRegNum(RegSuffix, RegNum)) { 2729 Error(Loc, "invalid register index"); 2730 return AMDGPU::NoRegister; 2731 } 2732 RegWidth = 32; 2733 } else { 2734 // Range of registers: v[XX:YY]. ":YY" is optional. 2735 if (!ParseRegRange(RegNum, RegWidth)) 2736 return AMDGPU::NoRegister; 2737 } 2738 2739 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2740 } 2741 2742 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2743 unsigned &RegWidth, 2744 SmallVectorImpl<AsmToken> &Tokens) { 2745 unsigned Reg = AMDGPU::NoRegister; 2746 auto ListLoc = getLoc(); 2747 2748 if (!skipToken(AsmToken::LBrac, 2749 "expected a register or a list of registers")) { 2750 return AMDGPU::NoRegister; 2751 } 2752 2753 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2754 2755 auto Loc = getLoc(); 2756 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2757 return AMDGPU::NoRegister; 2758 if (RegWidth != 32) { 2759 Error(Loc, "expected a single 32-bit register"); 2760 return AMDGPU::NoRegister; 2761 } 2762 2763 for (; trySkipToken(AsmToken::Comma); ) { 2764 RegisterKind NextRegKind; 2765 unsigned NextReg, NextRegNum, NextRegWidth; 2766 Loc = getLoc(); 2767 2768 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2769 NextRegNum, NextRegWidth, 2770 Tokens)) { 2771 return AMDGPU::NoRegister; 2772 } 2773 if (NextRegWidth != 32) { 2774 Error(Loc, "expected a single 32-bit register"); 2775 return AMDGPU::NoRegister; 2776 } 2777 if (NextRegKind != RegKind) { 2778 Error(Loc, "registers in a list must be of the same kind"); 2779 return AMDGPU::NoRegister; 2780 } 2781 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2782 return AMDGPU::NoRegister; 2783 } 2784 2785 if (!skipToken(AsmToken::RBrac, 2786 "expected a comma or a closing square bracket")) { 2787 return AMDGPU::NoRegister; 2788 } 2789 2790 if (isRegularReg(RegKind)) 2791 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2792 2793 return Reg; 2794 } 2795 2796 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2797 unsigned &RegNum, unsigned &RegWidth, 2798 SmallVectorImpl<AsmToken> &Tokens) { 2799 auto Loc = getLoc(); 2800 Reg = AMDGPU::NoRegister; 2801 2802 if (isToken(AsmToken::Identifier)) { 2803 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2804 if (Reg == AMDGPU::NoRegister) 2805 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2806 } else { 2807 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2808 } 2809 2810 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2811 if (Reg == AMDGPU::NoRegister) { 2812 assert(Parser.hasPendingError()); 2813 return false; 2814 } 2815 2816 if (!subtargetHasRegister(*TRI, Reg)) { 2817 if (Reg == AMDGPU::SGPR_NULL) { 2818 Error(Loc, "'null' operand is not supported on this GPU"); 2819 } else { 2820 Error(Loc, "register not available on this GPU"); 2821 } 2822 return false; 2823 } 2824 2825 return true; 2826 } 2827 2828 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2829 unsigned &RegNum, unsigned &RegWidth, 2830 bool RestoreOnFailure /*=false*/) { 2831 Reg = AMDGPU::NoRegister; 2832 2833 SmallVector<AsmToken, 1> Tokens; 2834 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2835 if (RestoreOnFailure) { 2836 while (!Tokens.empty()) { 2837 getLexer().UnLex(Tokens.pop_back_val()); 2838 } 2839 } 2840 return true; 2841 } 2842 return false; 2843 } 2844 2845 Optional<StringRef> 2846 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2847 switch (RegKind) { 2848 case IS_VGPR: 2849 return StringRef(".amdgcn.next_free_vgpr"); 2850 case IS_SGPR: 2851 return StringRef(".amdgcn.next_free_sgpr"); 2852 default: 2853 return None; 2854 } 2855 } 2856 2857 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2858 auto SymbolName = getGprCountSymbolName(RegKind); 2859 assert(SymbolName && "initializing invalid register kind"); 2860 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2861 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2862 } 2863 2864 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2865 unsigned DwordRegIndex, 2866 unsigned RegWidth) { 2867 // Symbols are only defined for GCN targets 2868 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2869 return true; 2870 2871 auto SymbolName = getGprCountSymbolName(RegKind); 2872 if (!SymbolName) 2873 return true; 2874 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2875 2876 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2877 int64_t OldCount; 2878 2879 if (!Sym->isVariable()) 2880 return !Error(getLoc(), 2881 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2882 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2883 return !Error( 2884 getLoc(), 2885 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2886 2887 if (OldCount <= NewMax) 2888 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2889 2890 return true; 2891 } 2892 2893 std::unique_ptr<AMDGPUOperand> 2894 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2895 const auto &Tok = getToken(); 2896 SMLoc StartLoc = Tok.getLoc(); 2897 SMLoc EndLoc = Tok.getEndLoc(); 2898 RegisterKind RegKind; 2899 unsigned Reg, RegNum, RegWidth; 2900 2901 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2902 return nullptr; 2903 } 2904 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2905 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2906 return nullptr; 2907 } else 2908 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2909 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2910 } 2911 2912 OperandMatchResultTy 2913 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2914 // TODO: add syntactic sugar for 1/(2*PI) 2915 2916 if (isRegister()) 2917 return MatchOperand_NoMatch; 2918 assert(!isModifier()); 2919 2920 const auto& Tok = getToken(); 2921 const auto& NextTok = peekToken(); 2922 bool IsReal = Tok.is(AsmToken::Real); 2923 SMLoc S = getLoc(); 2924 bool Negate = false; 2925 2926 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2927 lex(); 2928 IsReal = true; 2929 Negate = true; 2930 } 2931 2932 if (IsReal) { 2933 // Floating-point expressions are not supported. 2934 // Can only allow floating-point literals with an 2935 // optional sign. 2936 2937 StringRef Num = getTokenStr(); 2938 lex(); 2939 2940 APFloat RealVal(APFloat::IEEEdouble()); 2941 auto roundMode = APFloat::rmNearestTiesToEven; 2942 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2943 return MatchOperand_ParseFail; 2944 } 2945 if (Negate) 2946 RealVal.changeSign(); 2947 2948 Operands.push_back( 2949 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2950 AMDGPUOperand::ImmTyNone, true)); 2951 2952 return MatchOperand_Success; 2953 2954 } else { 2955 int64_t IntVal; 2956 const MCExpr *Expr; 2957 SMLoc S = getLoc(); 2958 2959 if (HasSP3AbsModifier) { 2960 // This is a workaround for handling expressions 2961 // as arguments of SP3 'abs' modifier, for example: 2962 // |1.0| 2963 // |-1| 2964 // |1+x| 2965 // This syntax is not compatible with syntax of standard 2966 // MC expressions (due to the trailing '|'). 2967 SMLoc EndLoc; 2968 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2969 return MatchOperand_ParseFail; 2970 } else { 2971 if (Parser.parseExpression(Expr)) 2972 return MatchOperand_ParseFail; 2973 } 2974 2975 if (Expr->evaluateAsAbsolute(IntVal)) { 2976 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2977 } else { 2978 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2979 } 2980 2981 return MatchOperand_Success; 2982 } 2983 2984 return MatchOperand_NoMatch; 2985 } 2986 2987 OperandMatchResultTy 2988 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2989 if (!isRegister()) 2990 return MatchOperand_NoMatch; 2991 2992 if (auto R = parseRegister()) { 2993 assert(R->isReg()); 2994 Operands.push_back(std::move(R)); 2995 return MatchOperand_Success; 2996 } 2997 return MatchOperand_ParseFail; 2998 } 2999 3000 OperandMatchResultTy 3001 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 3002 auto res = parseReg(Operands); 3003 if (res != MatchOperand_NoMatch) { 3004 return res; 3005 } else if (isModifier()) { 3006 return MatchOperand_NoMatch; 3007 } else { 3008 return parseImm(Operands, HasSP3AbsMod); 3009 } 3010 } 3011 3012 bool 3013 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3014 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3015 const auto &str = Token.getString(); 3016 return str == "abs" || str == "neg" || str == "sext"; 3017 } 3018 return false; 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3024 } 3025 3026 bool 3027 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3028 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3029 } 3030 3031 bool 3032 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3033 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3034 } 3035 3036 // Check if this is an operand modifier or an opcode modifier 3037 // which may look like an expression but it is not. We should 3038 // avoid parsing these modifiers as expressions. Currently 3039 // recognized sequences are: 3040 // |...| 3041 // abs(...) 3042 // neg(...) 3043 // sext(...) 3044 // -reg 3045 // -|...| 3046 // -abs(...) 3047 // name:... 3048 // Note that simple opcode modifiers like 'gds' may be parsed as 3049 // expressions; this is a special case. See getExpressionAsToken. 3050 // 3051 bool 3052 AMDGPUAsmParser::isModifier() { 3053 3054 AsmToken Tok = getToken(); 3055 AsmToken NextToken[2]; 3056 peekTokens(NextToken); 3057 3058 return isOperandModifier(Tok, NextToken[0]) || 3059 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3060 isOpcodeModifierWithVal(Tok, NextToken[0]); 3061 } 3062 3063 // Check if the current token is an SP3 'neg' modifier. 3064 // Currently this modifier is allowed in the following context: 3065 // 3066 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3067 // 2. Before an 'abs' modifier: -abs(...) 3068 // 3. Before an SP3 'abs' modifier: -|...| 3069 // 3070 // In all other cases "-" is handled as a part 3071 // of an expression that follows the sign. 3072 // 3073 // Note: When "-" is followed by an integer literal, 3074 // this is interpreted as integer negation rather 3075 // than a floating-point NEG modifier applied to N. 3076 // Beside being contr-intuitive, such use of floating-point 3077 // NEG modifier would have resulted in different meaning 3078 // of integer literals used with VOP1/2/C and VOP3, 3079 // for example: 3080 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3081 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3082 // Negative fp literals with preceding "-" are 3083 // handled likewise for uniformity 3084 // 3085 bool 3086 AMDGPUAsmParser::parseSP3NegModifier() { 3087 3088 AsmToken NextToken[2]; 3089 peekTokens(NextToken); 3090 3091 if (isToken(AsmToken::Minus) && 3092 (isRegister(NextToken[0], NextToken[1]) || 3093 NextToken[0].is(AsmToken::Pipe) || 3094 isId(NextToken[0], "abs"))) { 3095 lex(); 3096 return true; 3097 } 3098 3099 return false; 3100 } 3101 3102 OperandMatchResultTy 3103 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3104 bool AllowImm) { 3105 bool Neg, SP3Neg; 3106 bool Abs, SP3Abs; 3107 SMLoc Loc; 3108 3109 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3110 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3111 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3112 return MatchOperand_ParseFail; 3113 } 3114 3115 SP3Neg = parseSP3NegModifier(); 3116 3117 Loc = getLoc(); 3118 Neg = trySkipId("neg"); 3119 if (Neg && SP3Neg) { 3120 Error(Loc, "expected register or immediate"); 3121 return MatchOperand_ParseFail; 3122 } 3123 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3124 return MatchOperand_ParseFail; 3125 3126 Abs = trySkipId("abs"); 3127 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3128 return MatchOperand_ParseFail; 3129 3130 Loc = getLoc(); 3131 SP3Abs = trySkipToken(AsmToken::Pipe); 3132 if (Abs && SP3Abs) { 3133 Error(Loc, "expected register or immediate"); 3134 return MatchOperand_ParseFail; 3135 } 3136 3137 OperandMatchResultTy Res; 3138 if (AllowImm) { 3139 Res = parseRegOrImm(Operands, SP3Abs); 3140 } else { 3141 Res = parseReg(Operands); 3142 } 3143 if (Res != MatchOperand_Success) { 3144 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3145 } 3146 3147 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3148 return MatchOperand_ParseFail; 3149 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3150 return MatchOperand_ParseFail; 3151 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3152 return MatchOperand_ParseFail; 3153 3154 AMDGPUOperand::Modifiers Mods; 3155 Mods.Abs = Abs || SP3Abs; 3156 Mods.Neg = Neg || SP3Neg; 3157 3158 if (Mods.hasFPModifiers()) { 3159 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3160 if (Op.isExpr()) { 3161 Error(Op.getStartLoc(), "expected an absolute expression"); 3162 return MatchOperand_ParseFail; 3163 } 3164 Op.setModifiers(Mods); 3165 } 3166 return MatchOperand_Success; 3167 } 3168 3169 OperandMatchResultTy 3170 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3171 bool AllowImm) { 3172 bool Sext = trySkipId("sext"); 3173 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3174 return MatchOperand_ParseFail; 3175 3176 OperandMatchResultTy Res; 3177 if (AllowImm) { 3178 Res = parseRegOrImm(Operands); 3179 } else { 3180 Res = parseReg(Operands); 3181 } 3182 if (Res != MatchOperand_Success) { 3183 return Sext? MatchOperand_ParseFail : Res; 3184 } 3185 3186 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3187 return MatchOperand_ParseFail; 3188 3189 AMDGPUOperand::Modifiers Mods; 3190 Mods.Sext = Sext; 3191 3192 if (Mods.hasIntModifiers()) { 3193 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3194 if (Op.isExpr()) { 3195 Error(Op.getStartLoc(), "expected an absolute expression"); 3196 return MatchOperand_ParseFail; 3197 } 3198 Op.setModifiers(Mods); 3199 } 3200 3201 return MatchOperand_Success; 3202 } 3203 3204 OperandMatchResultTy 3205 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3206 return parseRegOrImmWithFPInputMods(Operands, false); 3207 } 3208 3209 OperandMatchResultTy 3210 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3211 return parseRegOrImmWithIntInputMods(Operands, false); 3212 } 3213 3214 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3215 auto Loc = getLoc(); 3216 if (trySkipId("off")) { 3217 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3218 AMDGPUOperand::ImmTyOff, false)); 3219 return MatchOperand_Success; 3220 } 3221 3222 if (!isRegister()) 3223 return MatchOperand_NoMatch; 3224 3225 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3226 if (Reg) { 3227 Operands.push_back(std::move(Reg)); 3228 return MatchOperand_Success; 3229 } 3230 3231 return MatchOperand_ParseFail; 3232 3233 } 3234 3235 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3236 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3237 3238 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3239 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3240 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3241 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3242 return Match_InvalidOperand; 3243 3244 if ((TSFlags & SIInstrFlags::VOP3) && 3245 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3246 getForcedEncodingSize() != 64) 3247 return Match_PreferE32; 3248 3249 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3250 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3251 // v_mac_f32/16 allow only dst_sel == DWORD; 3252 auto OpNum = 3253 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3254 const auto &Op = Inst.getOperand(OpNum); 3255 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3256 return Match_InvalidOperand; 3257 } 3258 } 3259 3260 return Match_Success; 3261 } 3262 3263 static ArrayRef<unsigned> getAllVariants() { 3264 static const unsigned Variants[] = { 3265 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3266 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3267 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3268 }; 3269 3270 return makeArrayRef(Variants); 3271 } 3272 3273 // What asm variants we should check 3274 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3275 if (isForcedDPP() && isForcedVOP3()) { 3276 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3277 return makeArrayRef(Variants); 3278 } 3279 if (getForcedEncodingSize() == 32) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3281 return makeArrayRef(Variants); 3282 } 3283 3284 if (isForcedVOP3()) { 3285 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3286 return makeArrayRef(Variants); 3287 } 3288 3289 if (isForcedSDWA()) { 3290 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3291 AMDGPUAsmVariants::SDWA9}; 3292 return makeArrayRef(Variants); 3293 } 3294 3295 if (isForcedDPP()) { 3296 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3297 return makeArrayRef(Variants); 3298 } 3299 3300 return getAllVariants(); 3301 } 3302 3303 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3304 if (isForcedDPP() && isForcedVOP3()) 3305 return "e64_dpp"; 3306 3307 if (getForcedEncodingSize() == 32) 3308 return "e32"; 3309 3310 if (isForcedVOP3()) 3311 return "e64"; 3312 3313 if (isForcedSDWA()) 3314 return "sdwa"; 3315 3316 if (isForcedDPP()) 3317 return "dpp"; 3318 3319 return ""; 3320 } 3321 3322 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3323 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3324 const unsigned Num = Desc.getNumImplicitUses(); 3325 for (unsigned i = 0; i < Num; ++i) { 3326 unsigned Reg = Desc.ImplicitUses[i]; 3327 switch (Reg) { 3328 case AMDGPU::FLAT_SCR: 3329 case AMDGPU::VCC: 3330 case AMDGPU::VCC_LO: 3331 case AMDGPU::VCC_HI: 3332 case AMDGPU::M0: 3333 return Reg; 3334 default: 3335 break; 3336 } 3337 } 3338 return AMDGPU::NoRegister; 3339 } 3340 3341 // NB: This code is correct only when used to check constant 3342 // bus limitations because GFX7 support no f16 inline constants. 3343 // Note that there are no cases when a GFX7 opcode violates 3344 // constant bus limitations due to the use of an f16 constant. 3345 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3346 unsigned OpIdx) const { 3347 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3348 3349 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3350 return false; 3351 } 3352 3353 const MCOperand &MO = Inst.getOperand(OpIdx); 3354 3355 int64_t Val = MO.getImm(); 3356 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3357 3358 switch (OpSize) { // expected operand size 3359 case 8: 3360 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3361 case 4: 3362 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3363 case 2: { 3364 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3365 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3367 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3368 return AMDGPU::isInlinableIntLiteral(Val); 3369 3370 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3371 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3372 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3373 return AMDGPU::isInlinableIntLiteralV216(Val); 3374 3375 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3376 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3377 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3378 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3379 3380 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3381 } 3382 default: 3383 llvm_unreachable("invalid operand size"); 3384 } 3385 } 3386 3387 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3388 if (!isGFX10Plus()) 3389 return 1; 3390 3391 switch (Opcode) { 3392 // 64-bit shift instructions can use only one scalar value input 3393 case AMDGPU::V_LSHLREV_B64_e64: 3394 case AMDGPU::V_LSHLREV_B64_gfx10: 3395 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3396 case AMDGPU::V_LSHRREV_B64_e64: 3397 case AMDGPU::V_LSHRREV_B64_gfx10: 3398 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3399 case AMDGPU::V_ASHRREV_I64_e64: 3400 case AMDGPU::V_ASHRREV_I64_gfx10: 3401 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3402 case AMDGPU::V_LSHL_B64_e64: 3403 case AMDGPU::V_LSHR_B64_e64: 3404 case AMDGPU::V_ASHR_I64_e64: 3405 return 1; 3406 default: 3407 return 2; 3408 } 3409 } 3410 3411 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3412 const MCOperand &MO = Inst.getOperand(OpIdx); 3413 if (MO.isImm()) { 3414 return !isInlineConstant(Inst, OpIdx); 3415 } else if (MO.isReg()) { 3416 auto Reg = MO.getReg(); 3417 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3418 auto PReg = mc2PseudoReg(Reg); 3419 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3420 } else { 3421 return true; 3422 } 3423 } 3424 3425 bool 3426 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3427 const OperandVector &Operands) { 3428 const unsigned Opcode = Inst.getOpcode(); 3429 const MCInstrDesc &Desc = MII.get(Opcode); 3430 unsigned LastSGPR = AMDGPU::NoRegister; 3431 unsigned ConstantBusUseCount = 0; 3432 unsigned NumLiterals = 0; 3433 unsigned LiteralSize; 3434 3435 if (Desc.TSFlags & 3436 (SIInstrFlags::VOPC | 3437 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3438 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3439 SIInstrFlags::SDWA)) { 3440 // Check special imm operands (used by madmk, etc) 3441 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3442 ++NumLiterals; 3443 LiteralSize = 4; 3444 } 3445 3446 SmallDenseSet<unsigned> SGPRsUsed; 3447 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3448 if (SGPRUsed != AMDGPU::NoRegister) { 3449 SGPRsUsed.insert(SGPRUsed); 3450 ++ConstantBusUseCount; 3451 } 3452 3453 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3454 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3455 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3456 3457 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3458 3459 for (int OpIdx : OpIndices) { 3460 if (OpIdx == -1) break; 3461 3462 const MCOperand &MO = Inst.getOperand(OpIdx); 3463 if (usesConstantBus(Inst, OpIdx)) { 3464 if (MO.isReg()) { 3465 LastSGPR = mc2PseudoReg(MO.getReg()); 3466 // Pairs of registers with a partial intersections like these 3467 // s0, s[0:1] 3468 // flat_scratch_lo, flat_scratch 3469 // flat_scratch_lo, flat_scratch_hi 3470 // are theoretically valid but they are disabled anyway. 3471 // Note that this code mimics SIInstrInfo::verifyInstruction 3472 if (SGPRsUsed.insert(LastSGPR).second) { 3473 ++ConstantBusUseCount; 3474 } 3475 } else { // Expression or a literal 3476 3477 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3478 continue; // special operand like VINTERP attr_chan 3479 3480 // An instruction may use only one literal. 3481 // This has been validated on the previous step. 3482 // See validateVOPLiteral. 3483 // This literal may be used as more than one operand. 3484 // If all these operands are of the same size, 3485 // this literal counts as one scalar value. 3486 // Otherwise it counts as 2 scalar values. 3487 // See "GFX10 Shader Programming", section 3.6.2.3. 3488 3489 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3490 if (Size < 4) Size = 4; 3491 3492 if (NumLiterals == 0) { 3493 NumLiterals = 1; 3494 LiteralSize = Size; 3495 } else if (LiteralSize != Size) { 3496 NumLiterals = 2; 3497 } 3498 } 3499 } 3500 } 3501 } 3502 ConstantBusUseCount += NumLiterals; 3503 3504 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3505 return true; 3506 3507 SMLoc LitLoc = getLitLoc(Operands); 3508 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3509 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3510 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3511 return false; 3512 } 3513 3514 bool 3515 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3516 const OperandVector &Operands) { 3517 const unsigned Opcode = Inst.getOpcode(); 3518 const MCInstrDesc &Desc = MII.get(Opcode); 3519 3520 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3521 if (DstIdx == -1 || 3522 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3523 return true; 3524 } 3525 3526 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3527 3528 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3529 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3530 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3531 3532 assert(DstIdx != -1); 3533 const MCOperand &Dst = Inst.getOperand(DstIdx); 3534 assert(Dst.isReg()); 3535 3536 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3537 3538 for (int SrcIdx : SrcIndices) { 3539 if (SrcIdx == -1) break; 3540 const MCOperand &Src = Inst.getOperand(SrcIdx); 3541 if (Src.isReg()) { 3542 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3543 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3544 Error(getRegLoc(SrcReg, Operands), 3545 "destination must be different than all sources"); 3546 return false; 3547 } 3548 } 3549 } 3550 3551 return true; 3552 } 3553 3554 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3555 3556 const unsigned Opc = Inst.getOpcode(); 3557 const MCInstrDesc &Desc = MII.get(Opc); 3558 3559 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3560 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3561 assert(ClampIdx != -1); 3562 return Inst.getOperand(ClampIdx).getImm() == 0; 3563 } 3564 3565 return true; 3566 } 3567 3568 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3569 3570 const unsigned Opc = Inst.getOpcode(); 3571 const MCInstrDesc &Desc = MII.get(Opc); 3572 3573 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3574 return None; 3575 3576 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3577 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3578 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3579 3580 assert(VDataIdx != -1); 3581 3582 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3583 return None; 3584 3585 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3586 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3587 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3588 if (DMask == 0) 3589 DMask = 1; 3590 3591 bool isPackedD16 = false; 3592 unsigned DataSize = 3593 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3594 if (hasPackedD16()) { 3595 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3596 isPackedD16 = D16Idx >= 0; 3597 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3598 DataSize = (DataSize + 1) / 2; 3599 } 3600 3601 if ((VDataSize / 4) == DataSize + TFESize) 3602 return None; 3603 3604 return StringRef(isPackedD16 3605 ? "image data size does not match dmask, d16 and tfe" 3606 : "image data size does not match dmask and tfe"); 3607 } 3608 3609 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3610 const unsigned Opc = Inst.getOpcode(); 3611 const MCInstrDesc &Desc = MII.get(Opc); 3612 3613 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3614 return true; 3615 3616 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3617 3618 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3619 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3620 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3621 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3622 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3623 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3624 3625 assert(VAddr0Idx != -1); 3626 assert(SrsrcIdx != -1); 3627 assert(SrsrcIdx > VAddr0Idx); 3628 3629 if (DimIdx == -1) 3630 return true; // intersect_ray 3631 3632 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3633 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3634 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3635 unsigned ActualAddrSize = 3636 IsNSA ? SrsrcIdx - VAddr0Idx 3637 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3638 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3639 3640 unsigned ExpectedAddrSize = 3641 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3642 3643 if (!IsNSA) { 3644 if (ExpectedAddrSize > 8) 3645 ExpectedAddrSize = 16; 3646 3647 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3648 // This provides backward compatibility for assembly created 3649 // before 160b/192b/224b types were directly supported. 3650 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3651 return true; 3652 } 3653 3654 return ActualAddrSize == ExpectedAddrSize; 3655 } 3656 3657 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3658 3659 const unsigned Opc = Inst.getOpcode(); 3660 const MCInstrDesc &Desc = MII.get(Opc); 3661 3662 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3663 return true; 3664 if (!Desc.mayLoad() || !Desc.mayStore()) 3665 return true; // Not atomic 3666 3667 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3668 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3669 3670 // This is an incomplete check because image_atomic_cmpswap 3671 // may only use 0x3 and 0xf while other atomic operations 3672 // may use 0x1 and 0x3. However these limitations are 3673 // verified when we check that dmask matches dst size. 3674 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3675 } 3676 3677 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3678 3679 const unsigned Opc = Inst.getOpcode(); 3680 const MCInstrDesc &Desc = MII.get(Opc); 3681 3682 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3683 return true; 3684 3685 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3686 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3687 3688 // GATHER4 instructions use dmask in a different fashion compared to 3689 // other MIMG instructions. The only useful DMASK values are 3690 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3691 // (red,red,red,red) etc.) The ISA document doesn't mention 3692 // this. 3693 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3694 } 3695 3696 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3697 const unsigned Opc = Inst.getOpcode(); 3698 const MCInstrDesc &Desc = MII.get(Opc); 3699 3700 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3701 return true; 3702 3703 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3704 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3705 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3706 3707 if (!BaseOpcode->MSAA) 3708 return true; 3709 3710 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3711 assert(DimIdx != -1); 3712 3713 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3714 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3715 3716 return DimInfo->MSAA; 3717 } 3718 3719 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3720 { 3721 switch (Opcode) { 3722 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3723 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3724 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3725 return true; 3726 default: 3727 return false; 3728 } 3729 } 3730 3731 // movrels* opcodes should only allow VGPRS as src0. 3732 // This is specified in .td description for vop1/vop3, 3733 // but sdwa is handled differently. See isSDWAOperand. 3734 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3735 const OperandVector &Operands) { 3736 3737 const unsigned Opc = Inst.getOpcode(); 3738 const MCInstrDesc &Desc = MII.get(Opc); 3739 3740 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3741 return true; 3742 3743 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3744 assert(Src0Idx != -1); 3745 3746 SMLoc ErrLoc; 3747 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3748 if (Src0.isReg()) { 3749 auto Reg = mc2PseudoReg(Src0.getReg()); 3750 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3751 if (!isSGPR(Reg, TRI)) 3752 return true; 3753 ErrLoc = getRegLoc(Reg, Operands); 3754 } else { 3755 ErrLoc = getConstLoc(Operands); 3756 } 3757 3758 Error(ErrLoc, "source operand must be a VGPR"); 3759 return false; 3760 } 3761 3762 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3763 const OperandVector &Operands) { 3764 3765 const unsigned Opc = Inst.getOpcode(); 3766 3767 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3768 return true; 3769 3770 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3771 assert(Src0Idx != -1); 3772 3773 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3774 if (!Src0.isReg()) 3775 return true; 3776 3777 auto Reg = mc2PseudoReg(Src0.getReg()); 3778 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3779 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3780 Error(getRegLoc(Reg, Operands), 3781 "source operand must be either a VGPR or an inline constant"); 3782 return false; 3783 } 3784 3785 return true; 3786 } 3787 3788 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3789 const OperandVector &Operands) { 3790 const unsigned Opc = Inst.getOpcode(); 3791 const MCInstrDesc &Desc = MII.get(Opc); 3792 3793 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3794 return true; 3795 3796 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3797 if (Src2Idx == -1) 3798 return true; 3799 3800 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3801 if (!Src2.isReg()) 3802 return true; 3803 3804 MCRegister Src2Reg = Src2.getReg(); 3805 MCRegister DstReg = Inst.getOperand(0).getReg(); 3806 if (Src2Reg == DstReg) 3807 return true; 3808 3809 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3810 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3811 return true; 3812 3813 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3814 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3815 "source 2 operand must not partially overlap with dst"); 3816 return false; 3817 } 3818 3819 return true; 3820 } 3821 3822 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3823 switch (Inst.getOpcode()) { 3824 default: 3825 return true; 3826 case V_DIV_SCALE_F32_gfx6_gfx7: 3827 case V_DIV_SCALE_F32_vi: 3828 case V_DIV_SCALE_F32_gfx10: 3829 case V_DIV_SCALE_F64_gfx6_gfx7: 3830 case V_DIV_SCALE_F64_vi: 3831 case V_DIV_SCALE_F64_gfx10: 3832 break; 3833 } 3834 3835 // TODO: Check that src0 = src1 or src2. 3836 3837 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3838 AMDGPU::OpName::src2_modifiers, 3839 AMDGPU::OpName::src2_modifiers}) { 3840 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3841 .getImm() & 3842 SISrcMods::ABS) { 3843 return false; 3844 } 3845 } 3846 3847 return true; 3848 } 3849 3850 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3851 3852 const unsigned Opc = Inst.getOpcode(); 3853 const MCInstrDesc &Desc = MII.get(Opc); 3854 3855 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3856 return true; 3857 3858 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3859 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3860 if (isCI() || isSI()) 3861 return false; 3862 } 3863 3864 return true; 3865 } 3866 3867 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3868 const unsigned Opc = Inst.getOpcode(); 3869 const MCInstrDesc &Desc = MII.get(Opc); 3870 3871 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3872 return true; 3873 3874 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3875 if (DimIdx < 0) 3876 return true; 3877 3878 long Imm = Inst.getOperand(DimIdx).getImm(); 3879 if (Imm < 0 || Imm >= 8) 3880 return false; 3881 3882 return true; 3883 } 3884 3885 static bool IsRevOpcode(const unsigned Opcode) 3886 { 3887 switch (Opcode) { 3888 case AMDGPU::V_SUBREV_F32_e32: 3889 case AMDGPU::V_SUBREV_F32_e64: 3890 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3891 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3892 case AMDGPU::V_SUBREV_F32_e32_vi: 3893 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3894 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3895 case AMDGPU::V_SUBREV_F32_e64_vi: 3896 3897 case AMDGPU::V_SUBREV_CO_U32_e32: 3898 case AMDGPU::V_SUBREV_CO_U32_e64: 3899 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3900 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3901 3902 case AMDGPU::V_SUBBREV_U32_e32: 3903 case AMDGPU::V_SUBBREV_U32_e64: 3904 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3905 case AMDGPU::V_SUBBREV_U32_e32_vi: 3906 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3907 case AMDGPU::V_SUBBREV_U32_e64_vi: 3908 3909 case AMDGPU::V_SUBREV_U32_e32: 3910 case AMDGPU::V_SUBREV_U32_e64: 3911 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3912 case AMDGPU::V_SUBREV_U32_e32_vi: 3913 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3914 case AMDGPU::V_SUBREV_U32_e64_vi: 3915 3916 case AMDGPU::V_SUBREV_F16_e32: 3917 case AMDGPU::V_SUBREV_F16_e64: 3918 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3919 case AMDGPU::V_SUBREV_F16_e32_vi: 3920 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3921 case AMDGPU::V_SUBREV_F16_e64_vi: 3922 3923 case AMDGPU::V_SUBREV_U16_e32: 3924 case AMDGPU::V_SUBREV_U16_e64: 3925 case AMDGPU::V_SUBREV_U16_e32_vi: 3926 case AMDGPU::V_SUBREV_U16_e64_vi: 3927 3928 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3929 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3930 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3931 3932 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3933 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3934 3935 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3936 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3937 3938 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3939 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3940 3941 case AMDGPU::V_LSHRREV_B32_e32: 3942 case AMDGPU::V_LSHRREV_B32_e64: 3943 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3944 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3945 case AMDGPU::V_LSHRREV_B32_e32_vi: 3946 case AMDGPU::V_LSHRREV_B32_e64_vi: 3947 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3948 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3949 3950 case AMDGPU::V_ASHRREV_I32_e32: 3951 case AMDGPU::V_ASHRREV_I32_e64: 3952 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3953 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3954 case AMDGPU::V_ASHRREV_I32_e32_vi: 3955 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3956 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3957 case AMDGPU::V_ASHRREV_I32_e64_vi: 3958 3959 case AMDGPU::V_LSHLREV_B32_e32: 3960 case AMDGPU::V_LSHLREV_B32_e64: 3961 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3962 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3963 case AMDGPU::V_LSHLREV_B32_e32_vi: 3964 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3965 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3966 case AMDGPU::V_LSHLREV_B32_e64_vi: 3967 3968 case AMDGPU::V_LSHLREV_B16_e32: 3969 case AMDGPU::V_LSHLREV_B16_e64: 3970 case AMDGPU::V_LSHLREV_B16_e32_vi: 3971 case AMDGPU::V_LSHLREV_B16_e64_vi: 3972 case AMDGPU::V_LSHLREV_B16_gfx10: 3973 3974 case AMDGPU::V_LSHRREV_B16_e32: 3975 case AMDGPU::V_LSHRREV_B16_e64: 3976 case AMDGPU::V_LSHRREV_B16_e32_vi: 3977 case AMDGPU::V_LSHRREV_B16_e64_vi: 3978 case AMDGPU::V_LSHRREV_B16_gfx10: 3979 3980 case AMDGPU::V_ASHRREV_I16_e32: 3981 case AMDGPU::V_ASHRREV_I16_e64: 3982 case AMDGPU::V_ASHRREV_I16_e32_vi: 3983 case AMDGPU::V_ASHRREV_I16_e64_vi: 3984 case AMDGPU::V_ASHRREV_I16_gfx10: 3985 3986 case AMDGPU::V_LSHLREV_B64_e64: 3987 case AMDGPU::V_LSHLREV_B64_gfx10: 3988 case AMDGPU::V_LSHLREV_B64_vi: 3989 3990 case AMDGPU::V_LSHRREV_B64_e64: 3991 case AMDGPU::V_LSHRREV_B64_gfx10: 3992 case AMDGPU::V_LSHRREV_B64_vi: 3993 3994 case AMDGPU::V_ASHRREV_I64_e64: 3995 case AMDGPU::V_ASHRREV_I64_gfx10: 3996 case AMDGPU::V_ASHRREV_I64_vi: 3997 3998 case AMDGPU::V_PK_LSHLREV_B16: 3999 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4000 case AMDGPU::V_PK_LSHLREV_B16_vi: 4001 4002 case AMDGPU::V_PK_LSHRREV_B16: 4003 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4004 case AMDGPU::V_PK_LSHRREV_B16_vi: 4005 case AMDGPU::V_PK_ASHRREV_I16: 4006 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4007 case AMDGPU::V_PK_ASHRREV_I16_vi: 4008 return true; 4009 default: 4010 return false; 4011 } 4012 } 4013 4014 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4015 4016 using namespace SIInstrFlags; 4017 const unsigned Opcode = Inst.getOpcode(); 4018 const MCInstrDesc &Desc = MII.get(Opcode); 4019 4020 // lds_direct register is defined so that it can be used 4021 // with 9-bit operands only. Ignore encodings which do not accept these. 4022 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4023 if ((Desc.TSFlags & Enc) == 0) 4024 return None; 4025 4026 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4027 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4028 if (SrcIdx == -1) 4029 break; 4030 const auto &Src = Inst.getOperand(SrcIdx); 4031 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4032 4033 if (isGFX90A() || isGFX11Plus()) 4034 return StringRef("lds_direct is not supported on this GPU"); 4035 4036 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4037 return StringRef("lds_direct cannot be used with this instruction"); 4038 4039 if (SrcName != OpName::src0) 4040 return StringRef("lds_direct may be used as src0 only"); 4041 } 4042 } 4043 4044 return None; 4045 } 4046 4047 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4048 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4049 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4050 if (Op.isFlatOffset()) 4051 return Op.getStartLoc(); 4052 } 4053 return getLoc(); 4054 } 4055 4056 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4057 const OperandVector &Operands) { 4058 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4059 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4060 return true; 4061 4062 auto Opcode = Inst.getOpcode(); 4063 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4064 assert(OpNum != -1); 4065 4066 const auto &Op = Inst.getOperand(OpNum); 4067 if (!hasFlatOffsets() && Op.getImm() != 0) { 4068 Error(getFlatOffsetLoc(Operands), 4069 "flat offset modifier is not supported on this GPU"); 4070 return false; 4071 } 4072 4073 // For FLAT segment the offset must be positive; 4074 // MSB is ignored and forced to zero. 4075 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4076 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4077 if (!isIntN(OffsetSize, Op.getImm())) { 4078 Error(getFlatOffsetLoc(Operands), 4079 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4080 return false; 4081 } 4082 } else { 4083 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4084 if (!isUIntN(OffsetSize, Op.getImm())) { 4085 Error(getFlatOffsetLoc(Operands), 4086 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4087 return false; 4088 } 4089 } 4090 4091 return true; 4092 } 4093 4094 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4095 // Start with second operand because SMEM Offset cannot be dst or src0. 4096 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4097 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4098 if (Op.isSMEMOffset()) 4099 return Op.getStartLoc(); 4100 } 4101 return getLoc(); 4102 } 4103 4104 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4105 const OperandVector &Operands) { 4106 if (isCI() || isSI()) 4107 return true; 4108 4109 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4110 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4111 return true; 4112 4113 auto Opcode = Inst.getOpcode(); 4114 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4115 if (OpNum == -1) 4116 return true; 4117 4118 const auto &Op = Inst.getOperand(OpNum); 4119 if (!Op.isImm()) 4120 return true; 4121 4122 uint64_t Offset = Op.getImm(); 4123 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4124 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4125 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4126 return true; 4127 4128 Error(getSMEMOffsetLoc(Operands), 4129 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4130 "expected a 21-bit signed offset"); 4131 4132 return false; 4133 } 4134 4135 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4136 unsigned Opcode = Inst.getOpcode(); 4137 const MCInstrDesc &Desc = MII.get(Opcode); 4138 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4139 return true; 4140 4141 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4142 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4143 4144 const int OpIndices[] = { Src0Idx, Src1Idx }; 4145 4146 unsigned NumExprs = 0; 4147 unsigned NumLiterals = 0; 4148 uint32_t LiteralValue; 4149 4150 for (int OpIdx : OpIndices) { 4151 if (OpIdx == -1) break; 4152 4153 const MCOperand &MO = Inst.getOperand(OpIdx); 4154 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4155 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4156 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4157 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4158 if (NumLiterals == 0 || LiteralValue != Value) { 4159 LiteralValue = Value; 4160 ++NumLiterals; 4161 } 4162 } else if (MO.isExpr()) { 4163 ++NumExprs; 4164 } 4165 } 4166 } 4167 4168 return NumLiterals + NumExprs <= 1; 4169 } 4170 4171 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4172 const unsigned Opc = Inst.getOpcode(); 4173 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4174 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4175 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4176 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4177 4178 if (OpSel & ~3) 4179 return false; 4180 } 4181 4182 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4183 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4184 if (OpSelIdx != -1) { 4185 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4186 return false; 4187 } 4188 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4189 if (OpSelHiIdx != -1) { 4190 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4191 return false; 4192 } 4193 } 4194 4195 return true; 4196 } 4197 4198 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4199 const OperandVector &Operands) { 4200 const unsigned Opc = Inst.getOpcode(); 4201 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4202 if (DppCtrlIdx < 0) 4203 return true; 4204 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4205 4206 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4207 // DPP64 is supported for row_newbcast only. 4208 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4209 if (Src0Idx >= 0 && 4210 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4211 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4212 Error(S, "64 bit dpp only supports row_newbcast"); 4213 return false; 4214 } 4215 } 4216 4217 return true; 4218 } 4219 4220 // Check if VCC register matches wavefront size 4221 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4222 auto FB = getFeatureBits(); 4223 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4224 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4225 } 4226 4227 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4228 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4229 const OperandVector &Operands) { 4230 unsigned Opcode = Inst.getOpcode(); 4231 const MCInstrDesc &Desc = MII.get(Opcode); 4232 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4233 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4234 ImmIdx == -1) 4235 return true; 4236 4237 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4238 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4239 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4240 4241 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4242 4243 unsigned NumExprs = 0; 4244 unsigned NumLiterals = 0; 4245 uint32_t LiteralValue; 4246 4247 for (int OpIdx : OpIndices) { 4248 if (OpIdx == -1) 4249 continue; 4250 4251 const MCOperand &MO = Inst.getOperand(OpIdx); 4252 if (!MO.isImm() && !MO.isExpr()) 4253 continue; 4254 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4255 continue; 4256 4257 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4258 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4259 Error(getConstLoc(Operands), 4260 "inline constants are not allowed for this operand"); 4261 return false; 4262 } 4263 4264 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4265 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4266 if (NumLiterals == 0 || LiteralValue != Value) { 4267 LiteralValue = Value; 4268 ++NumLiterals; 4269 } 4270 } else if (MO.isExpr()) { 4271 ++NumExprs; 4272 } 4273 } 4274 NumLiterals += NumExprs; 4275 4276 if (!NumLiterals) 4277 return true; 4278 4279 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4280 Error(getLitLoc(Operands), "literal operands are not supported"); 4281 return false; 4282 } 4283 4284 if (NumLiterals > 1) { 4285 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4286 return false; 4287 } 4288 4289 return true; 4290 } 4291 4292 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4293 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4294 const MCRegisterInfo *MRI) { 4295 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4296 if (OpIdx < 0) 4297 return -1; 4298 4299 const MCOperand &Op = Inst.getOperand(OpIdx); 4300 if (!Op.isReg()) 4301 return -1; 4302 4303 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4304 auto Reg = Sub ? Sub : Op.getReg(); 4305 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4306 return AGPR32.contains(Reg) ? 1 : 0; 4307 } 4308 4309 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4310 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4311 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4312 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4313 SIInstrFlags::DS)) == 0) 4314 return true; 4315 4316 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4317 : AMDGPU::OpName::vdata; 4318 4319 const MCRegisterInfo *MRI = getMRI(); 4320 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4321 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4322 4323 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4324 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4325 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4326 return false; 4327 } 4328 4329 auto FB = getFeatureBits(); 4330 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4331 if (DataAreg < 0 || DstAreg < 0) 4332 return true; 4333 return DstAreg == DataAreg; 4334 } 4335 4336 return DstAreg < 1 && DataAreg < 1; 4337 } 4338 4339 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4340 auto FB = getFeatureBits(); 4341 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4342 return true; 4343 4344 const MCRegisterInfo *MRI = getMRI(); 4345 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4346 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4347 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4348 const MCOperand &Op = Inst.getOperand(I); 4349 if (!Op.isReg()) 4350 continue; 4351 4352 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4353 if (!Sub) 4354 continue; 4355 4356 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4357 return false; 4358 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4359 return false; 4360 } 4361 4362 return true; 4363 } 4364 4365 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4366 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4367 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4368 if (Op.isBLGP()) 4369 return Op.getStartLoc(); 4370 } 4371 return SMLoc(); 4372 } 4373 4374 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4375 const OperandVector &Operands) { 4376 unsigned Opc = Inst.getOpcode(); 4377 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4378 if (BlgpIdx == -1) 4379 return true; 4380 SMLoc BLGPLoc = getBLGPLoc(Operands); 4381 if (!BLGPLoc.isValid()) 4382 return true; 4383 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4384 auto FB = getFeatureBits(); 4385 bool UsesNeg = false; 4386 if (FB[AMDGPU::FeatureGFX940Insts]) { 4387 switch (Opc) { 4388 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4389 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4390 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4391 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4392 UsesNeg = true; 4393 } 4394 } 4395 4396 if (IsNeg == UsesNeg) 4397 return true; 4398 4399 Error(BLGPLoc, 4400 UsesNeg ? "invalid modifier: blgp is not supported" 4401 : "invalid modifier: neg is not supported"); 4402 4403 return false; 4404 } 4405 4406 // gfx90a has an undocumented limitation: 4407 // DS_GWS opcodes must use even aligned registers. 4408 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4409 const OperandVector &Operands) { 4410 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4411 return true; 4412 4413 int Opc = Inst.getOpcode(); 4414 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4415 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4416 return true; 4417 4418 const MCRegisterInfo *MRI = getMRI(); 4419 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4420 int Data0Pos = 4421 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4422 assert(Data0Pos != -1); 4423 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4424 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4425 if (RegIdx & 1) { 4426 SMLoc RegLoc = getRegLoc(Reg, Operands); 4427 Error(RegLoc, "vgpr must be even aligned"); 4428 return false; 4429 } 4430 4431 return true; 4432 } 4433 4434 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4435 const OperandVector &Operands, 4436 const SMLoc &IDLoc) { 4437 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4438 AMDGPU::OpName::cpol); 4439 if (CPolPos == -1) 4440 return true; 4441 4442 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4443 4444 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4445 if (TSFlags & SIInstrFlags::SMRD) { 4446 if (CPol && (isSI() || isCI())) { 4447 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4448 Error(S, "cache policy is not supported for SMRD instructions"); 4449 return false; 4450 } 4451 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4452 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4453 return false; 4454 } 4455 } 4456 4457 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4458 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4459 StringRef CStr(S.getPointer()); 4460 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4461 Error(S, "scc is not supported on this GPU"); 4462 return false; 4463 } 4464 4465 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4466 return true; 4467 4468 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4469 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4470 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4471 : "instruction must use glc"); 4472 return false; 4473 } 4474 } else { 4475 if (CPol & CPol::GLC) { 4476 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4477 StringRef CStr(S.getPointer()); 4478 S = SMLoc::getFromPointer( 4479 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4480 Error(S, isGFX940() ? "instruction must not use sc0" 4481 : "instruction must not use glc"); 4482 return false; 4483 } 4484 } 4485 4486 return true; 4487 } 4488 4489 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4490 const OperandVector &Operands, 4491 const SMLoc &IDLoc) { 4492 if (isGFX940()) 4493 return true; 4494 4495 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4496 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4497 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4498 return true; 4499 // This is FLAT LDS DMA. 4500 4501 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4502 StringRef CStr(S.getPointer()); 4503 if (!CStr.startswith("lds")) { 4504 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4505 // And LDS version should have 'lds' modifier, but it follows optional 4506 // operands so its absense is ignored by the matcher. 4507 Error(IDLoc, "invalid operands for instruction"); 4508 return false; 4509 } 4510 4511 return true; 4512 } 4513 4514 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4515 if (!isGFX11Plus()) 4516 return true; 4517 for (auto &Operand : Operands) { 4518 if (!Operand->isReg()) 4519 continue; 4520 unsigned Reg = Operand->getReg(); 4521 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4522 Error(getRegLoc(Reg, Operands), 4523 "execz and vccz are not supported on this GPU"); 4524 return false; 4525 } 4526 } 4527 return true; 4528 } 4529 4530 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4531 const SMLoc &IDLoc, 4532 const OperandVector &Operands) { 4533 if (auto ErrMsg = validateLdsDirect(Inst)) { 4534 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4535 return false; 4536 } 4537 if (!validateSOPLiteral(Inst)) { 4538 Error(getLitLoc(Operands), 4539 "only one literal operand is allowed"); 4540 return false; 4541 } 4542 if (!validateVOPLiteral(Inst, Operands)) { 4543 return false; 4544 } 4545 if (!validateConstantBusLimitations(Inst, Operands)) { 4546 return false; 4547 } 4548 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4549 return false; 4550 } 4551 if (!validateIntClampSupported(Inst)) { 4552 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4553 "integer clamping is not supported on this GPU"); 4554 return false; 4555 } 4556 if (!validateOpSel(Inst)) { 4557 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4558 "invalid op_sel operand"); 4559 return false; 4560 } 4561 if (!validateDPP(Inst, Operands)) { 4562 return false; 4563 } 4564 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4565 if (!validateMIMGD16(Inst)) { 4566 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4567 "d16 modifier is not supported on this GPU"); 4568 return false; 4569 } 4570 if (!validateMIMGDim(Inst)) { 4571 Error(IDLoc, "dim modifier is required on this GPU"); 4572 return false; 4573 } 4574 if (!validateMIMGMSAA(Inst)) { 4575 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4576 "invalid dim; must be MSAA type"); 4577 return false; 4578 } 4579 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4580 Error(IDLoc, *ErrMsg); 4581 return false; 4582 } 4583 if (!validateMIMGAddrSize(Inst)) { 4584 Error(IDLoc, 4585 "image address size does not match dim and a16"); 4586 return false; 4587 } 4588 if (!validateMIMGAtomicDMask(Inst)) { 4589 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4590 "invalid atomic image dmask"); 4591 return false; 4592 } 4593 if (!validateMIMGGatherDMask(Inst)) { 4594 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4595 "invalid image_gather dmask: only one bit must be set"); 4596 return false; 4597 } 4598 if (!validateMovrels(Inst, Operands)) { 4599 return false; 4600 } 4601 if (!validateFlatOffset(Inst, Operands)) { 4602 return false; 4603 } 4604 if (!validateSMEMOffset(Inst, Operands)) { 4605 return false; 4606 } 4607 if (!validateMAIAccWrite(Inst, Operands)) { 4608 return false; 4609 } 4610 if (!validateMFMA(Inst, Operands)) { 4611 return false; 4612 } 4613 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4614 return false; 4615 } 4616 4617 if (!validateAGPRLdSt(Inst)) { 4618 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4619 ? "invalid register class: data and dst should be all VGPR or AGPR" 4620 : "invalid register class: agpr loads and stores not supported on this GPU" 4621 ); 4622 return false; 4623 } 4624 if (!validateVGPRAlign(Inst)) { 4625 Error(IDLoc, 4626 "invalid register class: vgpr tuples must be 64 bit aligned"); 4627 return false; 4628 } 4629 if (!validateGWS(Inst, Operands)) { 4630 return false; 4631 } 4632 4633 if (!validateBLGP(Inst, Operands)) { 4634 return false; 4635 } 4636 4637 if (!validateDivScale(Inst)) { 4638 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4639 return false; 4640 } 4641 if (!validateExeczVcczOperands(Operands)) { 4642 return false; 4643 } 4644 4645 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4646 return false; 4647 } 4648 4649 return true; 4650 } 4651 4652 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4653 const FeatureBitset &FBS, 4654 unsigned VariantID = 0); 4655 4656 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4657 const FeatureBitset &AvailableFeatures, 4658 unsigned VariantID); 4659 4660 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4661 const FeatureBitset &FBS) { 4662 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4663 } 4664 4665 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4666 const FeatureBitset &FBS, 4667 ArrayRef<unsigned> Variants) { 4668 for (auto Variant : Variants) { 4669 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4670 return true; 4671 } 4672 4673 return false; 4674 } 4675 4676 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4677 const SMLoc &IDLoc) { 4678 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4679 4680 // Check if requested instruction variant is supported. 4681 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4682 return false; 4683 4684 // This instruction is not supported. 4685 // Clear any other pending errors because they are no longer relevant. 4686 getParser().clearPendingErrors(); 4687 4688 // Requested instruction variant is not supported. 4689 // Check if any other variants are supported. 4690 StringRef VariantName = getMatchedVariantName(); 4691 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4692 return Error(IDLoc, 4693 Twine(VariantName, 4694 " variant of this instruction is not supported")); 4695 } 4696 4697 // Finally check if this instruction is supported on any other GPU. 4698 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4699 return Error(IDLoc, "instruction not supported on this GPU"); 4700 } 4701 4702 // Instruction not supported on any GPU. Probably a typo. 4703 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4704 return Error(IDLoc, "invalid instruction" + Suggestion); 4705 } 4706 4707 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4708 OperandVector &Operands, 4709 MCStreamer &Out, 4710 uint64_t &ErrorInfo, 4711 bool MatchingInlineAsm) { 4712 MCInst Inst; 4713 unsigned Result = Match_Success; 4714 for (auto Variant : getMatchedVariants()) { 4715 uint64_t EI; 4716 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4717 Variant); 4718 // We order match statuses from least to most specific. We use most specific 4719 // status as resulting 4720 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4721 if ((R == Match_Success) || 4722 (R == Match_PreferE32) || 4723 (R == Match_MissingFeature && Result != Match_PreferE32) || 4724 (R == Match_InvalidOperand && Result != Match_MissingFeature 4725 && Result != Match_PreferE32) || 4726 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4727 && Result != Match_MissingFeature 4728 && Result != Match_PreferE32)) { 4729 Result = R; 4730 ErrorInfo = EI; 4731 } 4732 if (R == Match_Success) 4733 break; 4734 } 4735 4736 if (Result == Match_Success) { 4737 if (!validateInstruction(Inst, IDLoc, Operands)) { 4738 return true; 4739 } 4740 Inst.setLoc(IDLoc); 4741 Out.emitInstruction(Inst, getSTI()); 4742 return false; 4743 } 4744 4745 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4746 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4747 return true; 4748 } 4749 4750 switch (Result) { 4751 default: break; 4752 case Match_MissingFeature: 4753 // It has been verified that the specified instruction 4754 // mnemonic is valid. A match was found but it requires 4755 // features which are not supported on this GPU. 4756 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4757 4758 case Match_InvalidOperand: { 4759 SMLoc ErrorLoc = IDLoc; 4760 if (ErrorInfo != ~0ULL) { 4761 if (ErrorInfo >= Operands.size()) { 4762 return Error(IDLoc, "too few operands for instruction"); 4763 } 4764 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4765 if (ErrorLoc == SMLoc()) 4766 ErrorLoc = IDLoc; 4767 } 4768 return Error(ErrorLoc, "invalid operand for instruction"); 4769 } 4770 4771 case Match_PreferE32: 4772 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4773 "should be encoded as e32"); 4774 case Match_MnemonicFail: 4775 llvm_unreachable("Invalid instructions should have been handled already"); 4776 } 4777 llvm_unreachable("Implement any new match types added!"); 4778 } 4779 4780 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4781 int64_t Tmp = -1; 4782 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4783 return true; 4784 } 4785 if (getParser().parseAbsoluteExpression(Tmp)) { 4786 return true; 4787 } 4788 Ret = static_cast<uint32_t>(Tmp); 4789 return false; 4790 } 4791 4792 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4793 uint32_t &Minor) { 4794 if (ParseAsAbsoluteExpression(Major)) 4795 return TokError("invalid major version"); 4796 4797 if (!trySkipToken(AsmToken::Comma)) 4798 return TokError("minor version number required, comma expected"); 4799 4800 if (ParseAsAbsoluteExpression(Minor)) 4801 return TokError("invalid minor version"); 4802 4803 return false; 4804 } 4805 4806 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4807 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4808 return TokError("directive only supported for amdgcn architecture"); 4809 4810 std::string TargetIDDirective; 4811 SMLoc TargetStart = getTok().getLoc(); 4812 if (getParser().parseEscapedString(TargetIDDirective)) 4813 return true; 4814 4815 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4816 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4817 return getParser().Error(TargetRange.Start, 4818 (Twine(".amdgcn_target directive's target id ") + 4819 Twine(TargetIDDirective) + 4820 Twine(" does not match the specified target id ") + 4821 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4822 4823 return false; 4824 } 4825 4826 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4827 return Error(Range.Start, "value out of range", Range); 4828 } 4829 4830 bool AMDGPUAsmParser::calculateGPRBlocks( 4831 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4832 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4833 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4834 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4835 // TODO(scott.linder): These calculations are duplicated from 4836 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4837 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4838 4839 unsigned NumVGPRs = NextFreeVGPR; 4840 unsigned NumSGPRs = NextFreeSGPR; 4841 4842 if (Version.Major >= 10) 4843 NumSGPRs = 0; 4844 else { 4845 unsigned MaxAddressableNumSGPRs = 4846 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4847 4848 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4849 NumSGPRs > MaxAddressableNumSGPRs) 4850 return OutOfRangeError(SGPRRange); 4851 4852 NumSGPRs += 4853 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4854 4855 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4856 NumSGPRs > MaxAddressableNumSGPRs) 4857 return OutOfRangeError(SGPRRange); 4858 4859 if (Features.test(FeatureSGPRInitBug)) 4860 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4861 } 4862 4863 VGPRBlocks = 4864 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4865 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4866 4867 return false; 4868 } 4869 4870 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4871 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4872 return TokError("directive only supported for amdgcn architecture"); 4873 4874 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4875 return TokError("directive only supported for amdhsa OS"); 4876 4877 StringRef KernelName; 4878 if (getParser().parseIdentifier(KernelName)) 4879 return true; 4880 4881 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4882 4883 StringSet<> Seen; 4884 4885 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4886 4887 SMRange VGPRRange; 4888 uint64_t NextFreeVGPR = 0; 4889 uint64_t AccumOffset = 0; 4890 uint64_t SharedVGPRCount = 0; 4891 SMRange SGPRRange; 4892 uint64_t NextFreeSGPR = 0; 4893 4894 // Count the number of user SGPRs implied from the enabled feature bits. 4895 unsigned ImpliedUserSGPRCount = 0; 4896 4897 // Track if the asm explicitly contains the directive for the user SGPR 4898 // count. 4899 Optional<unsigned> ExplicitUserSGPRCount; 4900 bool ReserveVCC = true; 4901 bool ReserveFlatScr = true; 4902 Optional<bool> EnableWavefrontSize32; 4903 4904 while (true) { 4905 while (trySkipToken(AsmToken::EndOfStatement)); 4906 4907 StringRef ID; 4908 SMRange IDRange = getTok().getLocRange(); 4909 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4910 return true; 4911 4912 if (ID == ".end_amdhsa_kernel") 4913 break; 4914 4915 if (!Seen.insert(ID).second) 4916 return TokError(".amdhsa_ directives cannot be repeated"); 4917 4918 SMLoc ValStart = getLoc(); 4919 int64_t IVal; 4920 if (getParser().parseAbsoluteExpression(IVal)) 4921 return true; 4922 SMLoc ValEnd = getLoc(); 4923 SMRange ValRange = SMRange(ValStart, ValEnd); 4924 4925 if (IVal < 0) 4926 return OutOfRangeError(ValRange); 4927 4928 uint64_t Val = IVal; 4929 4930 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4931 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4932 return OutOfRangeError(RANGE); \ 4933 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4934 4935 if (ID == ".amdhsa_group_segment_fixed_size") { 4936 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4937 return OutOfRangeError(ValRange); 4938 KD.group_segment_fixed_size = Val; 4939 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4940 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4941 return OutOfRangeError(ValRange); 4942 KD.private_segment_fixed_size = Val; 4943 } else if (ID == ".amdhsa_kernarg_size") { 4944 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4945 return OutOfRangeError(ValRange); 4946 KD.kernarg_size = Val; 4947 } else if (ID == ".amdhsa_user_sgpr_count") { 4948 ExplicitUserSGPRCount = Val; 4949 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4950 if (hasArchitectedFlatScratch()) 4951 return Error(IDRange.Start, 4952 "directive is not supported with architected flat scratch", 4953 IDRange); 4954 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4955 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4956 Val, ValRange); 4957 if (Val) 4958 ImpliedUserSGPRCount += 4; 4959 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4960 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4961 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4962 ValRange); 4963 if (Val) 4964 ImpliedUserSGPRCount += 2; 4965 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4966 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4967 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4968 ValRange); 4969 if (Val) 4970 ImpliedUserSGPRCount += 2; 4971 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4972 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4973 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4974 Val, ValRange); 4975 if (Val) 4976 ImpliedUserSGPRCount += 2; 4977 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4978 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4979 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4980 ValRange); 4981 if (Val) 4982 ImpliedUserSGPRCount += 2; 4983 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4984 if (hasArchitectedFlatScratch()) 4985 return Error(IDRange.Start, 4986 "directive is not supported with architected flat scratch", 4987 IDRange); 4988 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4989 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4990 ValRange); 4991 if (Val) 4992 ImpliedUserSGPRCount += 2; 4993 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4994 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4995 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4996 Val, ValRange); 4997 if (Val) 4998 ImpliedUserSGPRCount += 1; 4999 } else if (ID == ".amdhsa_wavefront_size32") { 5000 if (IVersion.Major < 10) 5001 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5002 EnableWavefrontSize32 = Val; 5003 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5004 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5005 Val, ValRange); 5006 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5007 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5008 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5009 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5010 if (hasArchitectedFlatScratch()) 5011 return Error(IDRange.Start, 5012 "directive is not supported with architected flat scratch", 5013 IDRange); 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5015 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5016 } else if (ID == ".amdhsa_enable_private_segment") { 5017 if (!hasArchitectedFlatScratch()) 5018 return Error( 5019 IDRange.Start, 5020 "directive is not supported without architected flat scratch", 5021 IDRange); 5022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5023 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5024 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5025 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5026 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5027 ValRange); 5028 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5029 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5030 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5031 ValRange); 5032 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5033 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5034 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5035 ValRange); 5036 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5037 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5038 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5039 ValRange); 5040 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5041 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5042 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5043 ValRange); 5044 } else if (ID == ".amdhsa_next_free_vgpr") { 5045 VGPRRange = ValRange; 5046 NextFreeVGPR = Val; 5047 } else if (ID == ".amdhsa_next_free_sgpr") { 5048 SGPRRange = ValRange; 5049 NextFreeSGPR = Val; 5050 } else if (ID == ".amdhsa_accum_offset") { 5051 if (!isGFX90A()) 5052 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5053 AccumOffset = Val; 5054 } else if (ID == ".amdhsa_reserve_vcc") { 5055 if (!isUInt<1>(Val)) 5056 return OutOfRangeError(ValRange); 5057 ReserveVCC = Val; 5058 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5059 if (IVersion.Major < 7) 5060 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5061 if (hasArchitectedFlatScratch()) 5062 return Error(IDRange.Start, 5063 "directive is not supported with architected flat scratch", 5064 IDRange); 5065 if (!isUInt<1>(Val)) 5066 return OutOfRangeError(ValRange); 5067 ReserveFlatScr = Val; 5068 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5069 if (IVersion.Major < 8) 5070 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5071 if (!isUInt<1>(Val)) 5072 return OutOfRangeError(ValRange); 5073 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5074 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5075 IDRange); 5076 } else if (ID == ".amdhsa_float_round_mode_32") { 5077 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5078 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5079 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5081 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5082 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5083 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5084 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5085 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5086 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5087 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5088 ValRange); 5089 } else if (ID == ".amdhsa_dx10_clamp") { 5090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5091 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5092 } else if (ID == ".amdhsa_ieee_mode") { 5093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5094 Val, ValRange); 5095 } else if (ID == ".amdhsa_fp16_overflow") { 5096 if (IVersion.Major < 9) 5097 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5098 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5099 ValRange); 5100 } else if (ID == ".amdhsa_tg_split") { 5101 if (!isGFX90A()) 5102 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5104 ValRange); 5105 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5106 if (IVersion.Major < 10) 5107 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5109 ValRange); 5110 } else if (ID == ".amdhsa_memory_ordered") { 5111 if (IVersion.Major < 10) 5112 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5113 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5114 ValRange); 5115 } else if (ID == ".amdhsa_forward_progress") { 5116 if (IVersion.Major < 10) 5117 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5118 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5119 ValRange); 5120 } else if (ID == ".amdhsa_shared_vgpr_count") { 5121 if (IVersion.Major < 10) 5122 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5123 SharedVGPRCount = Val; 5124 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5125 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5126 ValRange); 5127 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5128 PARSE_BITS_ENTRY( 5129 KD.compute_pgm_rsrc2, 5130 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5131 ValRange); 5132 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5133 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5134 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5135 Val, ValRange); 5136 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5137 PARSE_BITS_ENTRY( 5138 KD.compute_pgm_rsrc2, 5139 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5140 ValRange); 5141 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5142 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5143 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5144 Val, ValRange); 5145 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5146 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5147 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5148 Val, ValRange); 5149 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5150 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5151 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5152 Val, ValRange); 5153 } else if (ID == ".amdhsa_exception_int_div_zero") { 5154 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5155 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5156 Val, ValRange); 5157 } else { 5158 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5159 } 5160 5161 #undef PARSE_BITS_ENTRY 5162 } 5163 5164 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5165 return TokError(".amdhsa_next_free_vgpr directive is required"); 5166 5167 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5168 return TokError(".amdhsa_next_free_sgpr directive is required"); 5169 5170 unsigned VGPRBlocks; 5171 unsigned SGPRBlocks; 5172 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5173 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5174 EnableWavefrontSize32, NextFreeVGPR, 5175 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5176 SGPRBlocks)) 5177 return true; 5178 5179 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5180 VGPRBlocks)) 5181 return OutOfRangeError(VGPRRange); 5182 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5183 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5184 5185 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5186 SGPRBlocks)) 5187 return OutOfRangeError(SGPRRange); 5188 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5189 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5190 SGPRBlocks); 5191 5192 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5193 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5194 "enabled user SGPRs"); 5195 5196 unsigned UserSGPRCount = 5197 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5198 5199 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5200 return TokError("too many user SGPRs enabled"); 5201 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5202 UserSGPRCount); 5203 5204 if (isGFX90A()) { 5205 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5206 return TokError(".amdhsa_accum_offset directive is required"); 5207 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5208 return TokError("accum_offset should be in range [4..256] in " 5209 "increments of 4"); 5210 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5211 return TokError("accum_offset exceeds total VGPR allocation"); 5212 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5213 (AccumOffset / 4 - 1)); 5214 } 5215 5216 if (IVersion.Major == 10) { 5217 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5218 if (SharedVGPRCount && EnableWavefrontSize32) { 5219 return TokError("shared_vgpr_count directive not valid on " 5220 "wavefront size 32"); 5221 } 5222 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5223 return TokError("shared_vgpr_count*2 + " 5224 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5225 "exceed 63\n"); 5226 } 5227 } 5228 5229 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5230 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5231 ReserveFlatScr); 5232 return false; 5233 } 5234 5235 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5236 uint32_t Major; 5237 uint32_t Minor; 5238 5239 if (ParseDirectiveMajorMinor(Major, Minor)) 5240 return true; 5241 5242 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5243 return false; 5244 } 5245 5246 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5247 uint32_t Major; 5248 uint32_t Minor; 5249 uint32_t Stepping; 5250 StringRef VendorName; 5251 StringRef ArchName; 5252 5253 // If this directive has no arguments, then use the ISA version for the 5254 // targeted GPU. 5255 if (isToken(AsmToken::EndOfStatement)) { 5256 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5257 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5258 ISA.Stepping, 5259 "AMD", "AMDGPU"); 5260 return false; 5261 } 5262 5263 if (ParseDirectiveMajorMinor(Major, Minor)) 5264 return true; 5265 5266 if (!trySkipToken(AsmToken::Comma)) 5267 return TokError("stepping version number required, comma expected"); 5268 5269 if (ParseAsAbsoluteExpression(Stepping)) 5270 return TokError("invalid stepping version"); 5271 5272 if (!trySkipToken(AsmToken::Comma)) 5273 return TokError("vendor name required, comma expected"); 5274 5275 if (!parseString(VendorName, "invalid vendor name")) 5276 return true; 5277 5278 if (!trySkipToken(AsmToken::Comma)) 5279 return TokError("arch name required, comma expected"); 5280 5281 if (!parseString(ArchName, "invalid arch name")) 5282 return true; 5283 5284 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5285 VendorName, ArchName); 5286 return false; 5287 } 5288 5289 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5290 amd_kernel_code_t &Header) { 5291 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5292 // assembly for backwards compatibility. 5293 if (ID == "max_scratch_backing_memory_byte_size") { 5294 Parser.eatToEndOfStatement(); 5295 return false; 5296 } 5297 5298 SmallString<40> ErrStr; 5299 raw_svector_ostream Err(ErrStr); 5300 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5301 return TokError(Err.str()); 5302 } 5303 Lex(); 5304 5305 if (ID == "enable_wavefront_size32") { 5306 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5307 if (!isGFX10Plus()) 5308 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5309 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5310 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5311 } else { 5312 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5313 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5314 } 5315 } 5316 5317 if (ID == "wavefront_size") { 5318 if (Header.wavefront_size == 5) { 5319 if (!isGFX10Plus()) 5320 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5321 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5322 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5323 } else if (Header.wavefront_size == 6) { 5324 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5325 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5326 } 5327 } 5328 5329 if (ID == "enable_wgp_mode") { 5330 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5331 !isGFX10Plus()) 5332 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5333 } 5334 5335 if (ID == "enable_mem_ordered") { 5336 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5337 !isGFX10Plus()) 5338 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5339 } 5340 5341 if (ID == "enable_fwd_progress") { 5342 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5343 !isGFX10Plus()) 5344 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5345 } 5346 5347 return false; 5348 } 5349 5350 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5351 amd_kernel_code_t Header; 5352 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5353 5354 while (true) { 5355 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5356 // will set the current token to EndOfStatement. 5357 while(trySkipToken(AsmToken::EndOfStatement)); 5358 5359 StringRef ID; 5360 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5361 return true; 5362 5363 if (ID == ".end_amd_kernel_code_t") 5364 break; 5365 5366 if (ParseAMDKernelCodeTValue(ID, Header)) 5367 return true; 5368 } 5369 5370 getTargetStreamer().EmitAMDKernelCodeT(Header); 5371 5372 return false; 5373 } 5374 5375 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5376 StringRef KernelName; 5377 if (!parseId(KernelName, "expected symbol name")) 5378 return true; 5379 5380 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5381 ELF::STT_AMDGPU_HSA_KERNEL); 5382 5383 KernelScope.initialize(getContext()); 5384 return false; 5385 } 5386 5387 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5388 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5389 return Error(getLoc(), 5390 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5391 "architectures"); 5392 } 5393 5394 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5395 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5396 return Error(getParser().getTok().getLoc(), "target id must match options"); 5397 5398 getTargetStreamer().EmitISAVersion(); 5399 Lex(); 5400 5401 return false; 5402 } 5403 5404 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5405 const char *AssemblerDirectiveBegin; 5406 const char *AssemblerDirectiveEnd; 5407 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5408 isHsaAbiVersion3AndAbove(&getSTI()) 5409 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5410 HSAMD::V3::AssemblerDirectiveEnd) 5411 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5412 HSAMD::AssemblerDirectiveEnd); 5413 5414 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5415 return Error(getLoc(), 5416 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5417 "not available on non-amdhsa OSes")).str()); 5418 } 5419 5420 std::string HSAMetadataString; 5421 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5422 HSAMetadataString)) 5423 return true; 5424 5425 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5426 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5427 return Error(getLoc(), "invalid HSA metadata"); 5428 } else { 5429 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5430 return Error(getLoc(), "invalid HSA metadata"); 5431 } 5432 5433 return false; 5434 } 5435 5436 /// Common code to parse out a block of text (typically YAML) between start and 5437 /// end directives. 5438 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5439 const char *AssemblerDirectiveEnd, 5440 std::string &CollectString) { 5441 5442 raw_string_ostream CollectStream(CollectString); 5443 5444 getLexer().setSkipSpace(false); 5445 5446 bool FoundEnd = false; 5447 while (!isToken(AsmToken::Eof)) { 5448 while (isToken(AsmToken::Space)) { 5449 CollectStream << getTokenStr(); 5450 Lex(); 5451 } 5452 5453 if (trySkipId(AssemblerDirectiveEnd)) { 5454 FoundEnd = true; 5455 break; 5456 } 5457 5458 CollectStream << Parser.parseStringToEndOfStatement() 5459 << getContext().getAsmInfo()->getSeparatorString(); 5460 5461 Parser.eatToEndOfStatement(); 5462 } 5463 5464 getLexer().setSkipSpace(true); 5465 5466 if (isToken(AsmToken::Eof) && !FoundEnd) { 5467 return TokError(Twine("expected directive ") + 5468 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5469 } 5470 5471 CollectStream.flush(); 5472 return false; 5473 } 5474 5475 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5476 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5477 std::string String; 5478 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5479 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5480 return true; 5481 5482 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5483 if (!PALMetadata->setFromString(String)) 5484 return Error(getLoc(), "invalid PAL metadata"); 5485 return false; 5486 } 5487 5488 /// Parse the assembler directive for old linear-format PAL metadata. 5489 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5490 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5491 return Error(getLoc(), 5492 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5493 "not available on non-amdpal OSes")).str()); 5494 } 5495 5496 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5497 PALMetadata->setLegacy(); 5498 for (;;) { 5499 uint32_t Key, Value; 5500 if (ParseAsAbsoluteExpression(Key)) { 5501 return TokError(Twine("invalid value in ") + 5502 Twine(PALMD::AssemblerDirective)); 5503 } 5504 if (!trySkipToken(AsmToken::Comma)) { 5505 return TokError(Twine("expected an even number of values in ") + 5506 Twine(PALMD::AssemblerDirective)); 5507 } 5508 if (ParseAsAbsoluteExpression(Value)) { 5509 return TokError(Twine("invalid value in ") + 5510 Twine(PALMD::AssemblerDirective)); 5511 } 5512 PALMetadata->setRegister(Key, Value); 5513 if (!trySkipToken(AsmToken::Comma)) 5514 break; 5515 } 5516 return false; 5517 } 5518 5519 /// ParseDirectiveAMDGPULDS 5520 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5521 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5522 if (getParser().checkForValidSection()) 5523 return true; 5524 5525 StringRef Name; 5526 SMLoc NameLoc = getLoc(); 5527 if (getParser().parseIdentifier(Name)) 5528 return TokError("expected identifier in directive"); 5529 5530 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5531 if (parseToken(AsmToken::Comma, "expected ','")) 5532 return true; 5533 5534 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5535 5536 int64_t Size; 5537 SMLoc SizeLoc = getLoc(); 5538 if (getParser().parseAbsoluteExpression(Size)) 5539 return true; 5540 if (Size < 0) 5541 return Error(SizeLoc, "size must be non-negative"); 5542 if (Size > LocalMemorySize) 5543 return Error(SizeLoc, "size is too large"); 5544 5545 int64_t Alignment = 4; 5546 if (trySkipToken(AsmToken::Comma)) { 5547 SMLoc AlignLoc = getLoc(); 5548 if (getParser().parseAbsoluteExpression(Alignment)) 5549 return true; 5550 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5551 return Error(AlignLoc, "alignment must be a power of two"); 5552 5553 // Alignment larger than the size of LDS is possible in theory, as long 5554 // as the linker manages to place to symbol at address 0, but we do want 5555 // to make sure the alignment fits nicely into a 32-bit integer. 5556 if (Alignment >= 1u << 31) 5557 return Error(AlignLoc, "alignment is too large"); 5558 } 5559 5560 if (parseEOL()) 5561 return true; 5562 5563 Symbol->redefineIfPossible(); 5564 if (!Symbol->isUndefined()) 5565 return Error(NameLoc, "invalid symbol redefinition"); 5566 5567 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5568 return false; 5569 } 5570 5571 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5572 StringRef IDVal = DirectiveID.getString(); 5573 5574 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5575 if (IDVal == ".amdhsa_kernel") 5576 return ParseDirectiveAMDHSAKernel(); 5577 5578 // TODO: Restructure/combine with PAL metadata directive. 5579 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5580 return ParseDirectiveHSAMetadata(); 5581 } else { 5582 if (IDVal == ".hsa_code_object_version") 5583 return ParseDirectiveHSACodeObjectVersion(); 5584 5585 if (IDVal == ".hsa_code_object_isa") 5586 return ParseDirectiveHSACodeObjectISA(); 5587 5588 if (IDVal == ".amd_kernel_code_t") 5589 return ParseDirectiveAMDKernelCodeT(); 5590 5591 if (IDVal == ".amdgpu_hsa_kernel") 5592 return ParseDirectiveAMDGPUHsaKernel(); 5593 5594 if (IDVal == ".amd_amdgpu_isa") 5595 return ParseDirectiveISAVersion(); 5596 5597 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5598 return ParseDirectiveHSAMetadata(); 5599 } 5600 5601 if (IDVal == ".amdgcn_target") 5602 return ParseDirectiveAMDGCNTarget(); 5603 5604 if (IDVal == ".amdgpu_lds") 5605 return ParseDirectiveAMDGPULDS(); 5606 5607 if (IDVal == PALMD::AssemblerDirectiveBegin) 5608 return ParseDirectivePALMetadataBegin(); 5609 5610 if (IDVal == PALMD::AssemblerDirective) 5611 return ParseDirectivePALMetadata(); 5612 5613 return true; 5614 } 5615 5616 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5617 unsigned RegNo) { 5618 5619 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5620 return isGFX9Plus(); 5621 5622 // GFX10+ has 2 more SGPRs 104 and 105. 5623 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5624 return hasSGPR104_SGPR105(); 5625 5626 switch (RegNo) { 5627 case AMDGPU::SRC_SHARED_BASE: 5628 case AMDGPU::SRC_SHARED_LIMIT: 5629 case AMDGPU::SRC_PRIVATE_BASE: 5630 case AMDGPU::SRC_PRIVATE_LIMIT: 5631 return isGFX9Plus(); 5632 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5633 return isGFX9Plus() && !isGFX11Plus(); 5634 case AMDGPU::TBA: 5635 case AMDGPU::TBA_LO: 5636 case AMDGPU::TBA_HI: 5637 case AMDGPU::TMA: 5638 case AMDGPU::TMA_LO: 5639 case AMDGPU::TMA_HI: 5640 return !isGFX9Plus(); 5641 case AMDGPU::XNACK_MASK: 5642 case AMDGPU::XNACK_MASK_LO: 5643 case AMDGPU::XNACK_MASK_HI: 5644 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5645 case AMDGPU::SGPR_NULL: 5646 return isGFX10Plus(); 5647 default: 5648 break; 5649 } 5650 5651 if (isCI()) 5652 return true; 5653 5654 if (isSI() || isGFX10Plus()) { 5655 // No flat_scr on SI. 5656 // On GFX10Plus flat scratch is not a valid register operand and can only be 5657 // accessed with s_setreg/s_getreg. 5658 switch (RegNo) { 5659 case AMDGPU::FLAT_SCR: 5660 case AMDGPU::FLAT_SCR_LO: 5661 case AMDGPU::FLAT_SCR_HI: 5662 return false; 5663 default: 5664 return true; 5665 } 5666 } 5667 5668 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5669 // SI/CI have. 5670 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5671 return hasSGPR102_SGPR103(); 5672 5673 return true; 5674 } 5675 5676 OperandMatchResultTy 5677 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5678 OperandMode Mode) { 5679 OperandMatchResultTy ResTy = parseVOPD(Operands); 5680 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5681 isToken(AsmToken::EndOfStatement)) 5682 return ResTy; 5683 5684 // Try to parse with a custom parser 5685 ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5686 5687 // If we successfully parsed the operand or if there as an error parsing, 5688 // we are done. 5689 // 5690 // If we are parsing after we reach EndOfStatement then this means we 5691 // are appending default values to the Operands list. This is only done 5692 // by custom parser, so we shouldn't continue on to the generic parsing. 5693 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5694 isToken(AsmToken::EndOfStatement)) 5695 return ResTy; 5696 5697 SMLoc RBraceLoc; 5698 SMLoc LBraceLoc = getLoc(); 5699 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5700 unsigned Prefix = Operands.size(); 5701 5702 for (;;) { 5703 auto Loc = getLoc(); 5704 ResTy = parseReg(Operands); 5705 if (ResTy == MatchOperand_NoMatch) 5706 Error(Loc, "expected a register"); 5707 if (ResTy != MatchOperand_Success) 5708 return MatchOperand_ParseFail; 5709 5710 RBraceLoc = getLoc(); 5711 if (trySkipToken(AsmToken::RBrac)) 5712 break; 5713 5714 if (!skipToken(AsmToken::Comma, 5715 "expected a comma or a closing square bracket")) { 5716 return MatchOperand_ParseFail; 5717 } 5718 } 5719 5720 if (Operands.size() - Prefix > 1) { 5721 Operands.insert(Operands.begin() + Prefix, 5722 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5723 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5724 } 5725 5726 return MatchOperand_Success; 5727 } 5728 5729 return parseRegOrImm(Operands); 5730 } 5731 5732 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5733 // Clear any forced encodings from the previous instruction. 5734 setForcedEncodingSize(0); 5735 setForcedDPP(false); 5736 setForcedSDWA(false); 5737 5738 if (Name.endswith("_e64_dpp")) { 5739 setForcedDPP(true); 5740 setForcedEncodingSize(64); 5741 return Name.substr(0, Name.size() - 8); 5742 } else if (Name.endswith("_e64")) { 5743 setForcedEncodingSize(64); 5744 return Name.substr(0, Name.size() - 4); 5745 } else if (Name.endswith("_e32")) { 5746 setForcedEncodingSize(32); 5747 return Name.substr(0, Name.size() - 4); 5748 } else if (Name.endswith("_dpp")) { 5749 setForcedDPP(true); 5750 return Name.substr(0, Name.size() - 4); 5751 } else if (Name.endswith("_sdwa")) { 5752 setForcedSDWA(true); 5753 return Name.substr(0, Name.size() - 5); 5754 } 5755 return Name; 5756 } 5757 5758 static void applyMnemonicAliases(StringRef &Mnemonic, 5759 const FeatureBitset &Features, 5760 unsigned VariantID); 5761 5762 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5763 StringRef Name, 5764 SMLoc NameLoc, OperandVector &Operands) { 5765 // Add the instruction mnemonic 5766 Name = parseMnemonicSuffix(Name); 5767 5768 // If the target architecture uses MnemonicAlias, call it here to parse 5769 // operands correctly. 5770 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5771 5772 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5773 5774 bool IsMIMG = Name.startswith("image_"); 5775 5776 while (!trySkipToken(AsmToken::EndOfStatement)) { 5777 OperandMode Mode = OperandMode_Default; 5778 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5779 Mode = OperandMode_NSA; 5780 CPolSeen = 0; 5781 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5782 5783 if (Res != MatchOperand_Success) { 5784 checkUnsupportedInstruction(Name, NameLoc); 5785 if (!Parser.hasPendingError()) { 5786 // FIXME: use real operand location rather than the current location. 5787 StringRef Msg = 5788 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5789 "not a valid operand."; 5790 Error(getLoc(), Msg); 5791 } 5792 while (!trySkipToken(AsmToken::EndOfStatement)) { 5793 lex(); 5794 } 5795 return true; 5796 } 5797 5798 // Eat the comma or space if there is one. 5799 trySkipToken(AsmToken::Comma); 5800 } 5801 5802 return false; 5803 } 5804 5805 //===----------------------------------------------------------------------===// 5806 // Utility functions 5807 //===----------------------------------------------------------------------===// 5808 5809 OperandMatchResultTy 5810 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5811 5812 if (!trySkipId(Prefix, AsmToken::Colon)) 5813 return MatchOperand_NoMatch; 5814 5815 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5816 } 5817 5818 OperandMatchResultTy 5819 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5820 AMDGPUOperand::ImmTy ImmTy, 5821 bool (*ConvertResult)(int64_t&)) { 5822 SMLoc S = getLoc(); 5823 int64_t Value = 0; 5824 5825 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5826 if (Res != MatchOperand_Success) 5827 return Res; 5828 5829 if (ConvertResult && !ConvertResult(Value)) { 5830 Error(S, "invalid " + StringRef(Prefix) + " value."); 5831 } 5832 5833 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5834 return MatchOperand_Success; 5835 } 5836 5837 OperandMatchResultTy 5838 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5839 OperandVector &Operands, 5840 AMDGPUOperand::ImmTy ImmTy, 5841 bool (*ConvertResult)(int64_t&)) { 5842 SMLoc S = getLoc(); 5843 if (!trySkipId(Prefix, AsmToken::Colon)) 5844 return MatchOperand_NoMatch; 5845 5846 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5847 return MatchOperand_ParseFail; 5848 5849 unsigned Val = 0; 5850 const unsigned MaxSize = 4; 5851 5852 // FIXME: How to verify the number of elements matches the number of src 5853 // operands? 5854 for (int I = 0; ; ++I) { 5855 int64_t Op; 5856 SMLoc Loc = getLoc(); 5857 if (!parseExpr(Op)) 5858 return MatchOperand_ParseFail; 5859 5860 if (Op != 0 && Op != 1) { 5861 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5862 return MatchOperand_ParseFail; 5863 } 5864 5865 Val |= (Op << I); 5866 5867 if (trySkipToken(AsmToken::RBrac)) 5868 break; 5869 5870 if (I + 1 == MaxSize) { 5871 Error(getLoc(), "expected a closing square bracket"); 5872 return MatchOperand_ParseFail; 5873 } 5874 5875 if (!skipToken(AsmToken::Comma, "expected a comma")) 5876 return MatchOperand_ParseFail; 5877 } 5878 5879 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5880 return MatchOperand_Success; 5881 } 5882 5883 OperandMatchResultTy 5884 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5885 AMDGPUOperand::ImmTy ImmTy) { 5886 int64_t Bit; 5887 SMLoc S = getLoc(); 5888 5889 if (trySkipId(Name)) { 5890 Bit = 1; 5891 } else if (trySkipId("no", Name)) { 5892 Bit = 0; 5893 } else { 5894 return MatchOperand_NoMatch; 5895 } 5896 5897 if (Name == "r128" && !hasMIMG_R128()) { 5898 Error(S, "r128 modifier is not supported on this GPU"); 5899 return MatchOperand_ParseFail; 5900 } 5901 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5902 Error(S, "a16 modifier is not supported on this GPU"); 5903 return MatchOperand_ParseFail; 5904 } 5905 5906 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5907 ImmTy = AMDGPUOperand::ImmTyR128A16; 5908 5909 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5910 return MatchOperand_Success; 5911 } 5912 5913 OperandMatchResultTy 5914 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5915 unsigned CPolOn = 0; 5916 unsigned CPolOff = 0; 5917 SMLoc S = getLoc(); 5918 5919 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5920 if (isGFX940() && !Mnemo.startswith("s_")) { 5921 if (trySkipId("sc0")) 5922 CPolOn = AMDGPU::CPol::SC0; 5923 else if (trySkipId("nosc0")) 5924 CPolOff = AMDGPU::CPol::SC0; 5925 else if (trySkipId("nt")) 5926 CPolOn = AMDGPU::CPol::NT; 5927 else if (trySkipId("nont")) 5928 CPolOff = AMDGPU::CPol::NT; 5929 else if (trySkipId("sc1")) 5930 CPolOn = AMDGPU::CPol::SC1; 5931 else if (trySkipId("nosc1")) 5932 CPolOff = AMDGPU::CPol::SC1; 5933 else 5934 return MatchOperand_NoMatch; 5935 } 5936 else if (trySkipId("glc")) 5937 CPolOn = AMDGPU::CPol::GLC; 5938 else if (trySkipId("noglc")) 5939 CPolOff = AMDGPU::CPol::GLC; 5940 else if (trySkipId("slc")) 5941 CPolOn = AMDGPU::CPol::SLC; 5942 else if (trySkipId("noslc")) 5943 CPolOff = AMDGPU::CPol::SLC; 5944 else if (trySkipId("dlc")) 5945 CPolOn = AMDGPU::CPol::DLC; 5946 else if (trySkipId("nodlc")) 5947 CPolOff = AMDGPU::CPol::DLC; 5948 else if (trySkipId("scc")) 5949 CPolOn = AMDGPU::CPol::SCC; 5950 else if (trySkipId("noscc")) 5951 CPolOff = AMDGPU::CPol::SCC; 5952 else 5953 return MatchOperand_NoMatch; 5954 5955 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5956 Error(S, "dlc modifier is not supported on this GPU"); 5957 return MatchOperand_ParseFail; 5958 } 5959 5960 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5961 Error(S, "scc modifier is not supported on this GPU"); 5962 return MatchOperand_ParseFail; 5963 } 5964 5965 if (CPolSeen & (CPolOn | CPolOff)) { 5966 Error(S, "duplicate cache policy modifier"); 5967 return MatchOperand_ParseFail; 5968 } 5969 5970 CPolSeen |= (CPolOn | CPolOff); 5971 5972 for (unsigned I = 1; I != Operands.size(); ++I) { 5973 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5974 if (Op.isCPol()) { 5975 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5976 return MatchOperand_Success; 5977 } 5978 } 5979 5980 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5981 AMDGPUOperand::ImmTyCPol)); 5982 5983 return MatchOperand_Success; 5984 } 5985 5986 static void addOptionalImmOperand( 5987 MCInst& Inst, const OperandVector& Operands, 5988 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5989 AMDGPUOperand::ImmTy ImmT, 5990 int64_t Default = 0) { 5991 auto i = OptionalIdx.find(ImmT); 5992 if (i != OptionalIdx.end()) { 5993 unsigned Idx = i->second; 5994 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5995 } else { 5996 Inst.addOperand(MCOperand::createImm(Default)); 5997 } 5998 } 5999 6000 OperandMatchResultTy 6001 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6002 StringRef &Value, 6003 SMLoc &StringLoc) { 6004 if (!trySkipId(Prefix, AsmToken::Colon)) 6005 return MatchOperand_NoMatch; 6006 6007 StringLoc = getLoc(); 6008 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6009 : MatchOperand_ParseFail; 6010 } 6011 6012 //===----------------------------------------------------------------------===// 6013 // MTBUF format 6014 //===----------------------------------------------------------------------===// 6015 6016 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6017 int64_t MaxVal, 6018 int64_t &Fmt) { 6019 int64_t Val; 6020 SMLoc Loc = getLoc(); 6021 6022 auto Res = parseIntWithPrefix(Pref, Val); 6023 if (Res == MatchOperand_ParseFail) 6024 return false; 6025 if (Res == MatchOperand_NoMatch) 6026 return true; 6027 6028 if (Val < 0 || Val > MaxVal) { 6029 Error(Loc, Twine("out of range ", StringRef(Pref))); 6030 return false; 6031 } 6032 6033 Fmt = Val; 6034 return true; 6035 } 6036 6037 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6038 // values to live in a joint format operand in the MCInst encoding. 6039 OperandMatchResultTy 6040 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6041 using namespace llvm::AMDGPU::MTBUFFormat; 6042 6043 int64_t Dfmt = DFMT_UNDEF; 6044 int64_t Nfmt = NFMT_UNDEF; 6045 6046 // dfmt and nfmt can appear in either order, and each is optional. 6047 for (int I = 0; I < 2; ++I) { 6048 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6049 return MatchOperand_ParseFail; 6050 6051 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6052 return MatchOperand_ParseFail; 6053 } 6054 // Skip optional comma between dfmt/nfmt 6055 // but guard against 2 commas following each other. 6056 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6057 !peekToken().is(AsmToken::Comma)) { 6058 trySkipToken(AsmToken::Comma); 6059 } 6060 } 6061 6062 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6063 return MatchOperand_NoMatch; 6064 6065 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6066 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6067 6068 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6069 return MatchOperand_Success; 6070 } 6071 6072 OperandMatchResultTy 6073 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6074 using namespace llvm::AMDGPU::MTBUFFormat; 6075 6076 int64_t Fmt = UFMT_UNDEF; 6077 6078 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6079 return MatchOperand_ParseFail; 6080 6081 if (Fmt == UFMT_UNDEF) 6082 return MatchOperand_NoMatch; 6083 6084 Format = Fmt; 6085 return MatchOperand_Success; 6086 } 6087 6088 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6089 int64_t &Nfmt, 6090 StringRef FormatStr, 6091 SMLoc Loc) { 6092 using namespace llvm::AMDGPU::MTBUFFormat; 6093 int64_t Format; 6094 6095 Format = getDfmt(FormatStr); 6096 if (Format != DFMT_UNDEF) { 6097 Dfmt = Format; 6098 return true; 6099 } 6100 6101 Format = getNfmt(FormatStr, getSTI()); 6102 if (Format != NFMT_UNDEF) { 6103 Nfmt = Format; 6104 return true; 6105 } 6106 6107 Error(Loc, "unsupported format"); 6108 return false; 6109 } 6110 6111 OperandMatchResultTy 6112 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6113 SMLoc FormatLoc, 6114 int64_t &Format) { 6115 using namespace llvm::AMDGPU::MTBUFFormat; 6116 6117 int64_t Dfmt = DFMT_UNDEF; 6118 int64_t Nfmt = NFMT_UNDEF; 6119 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6120 return MatchOperand_ParseFail; 6121 6122 if (trySkipToken(AsmToken::Comma)) { 6123 StringRef Str; 6124 SMLoc Loc = getLoc(); 6125 if (!parseId(Str, "expected a format string") || 6126 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6127 return MatchOperand_ParseFail; 6128 } 6129 if (Dfmt == DFMT_UNDEF) { 6130 Error(Loc, "duplicate numeric format"); 6131 return MatchOperand_ParseFail; 6132 } else if (Nfmt == NFMT_UNDEF) { 6133 Error(Loc, "duplicate data format"); 6134 return MatchOperand_ParseFail; 6135 } 6136 } 6137 6138 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6139 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6140 6141 if (isGFX10Plus()) { 6142 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6143 if (Ufmt == UFMT_UNDEF) { 6144 Error(FormatLoc, "unsupported format"); 6145 return MatchOperand_ParseFail; 6146 } 6147 Format = Ufmt; 6148 } else { 6149 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6150 } 6151 6152 return MatchOperand_Success; 6153 } 6154 6155 OperandMatchResultTy 6156 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6157 SMLoc Loc, 6158 int64_t &Format) { 6159 using namespace llvm::AMDGPU::MTBUFFormat; 6160 6161 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6162 if (Id == UFMT_UNDEF) 6163 return MatchOperand_NoMatch; 6164 6165 if (!isGFX10Plus()) { 6166 Error(Loc, "unified format is not supported on this GPU"); 6167 return MatchOperand_ParseFail; 6168 } 6169 6170 Format = Id; 6171 return MatchOperand_Success; 6172 } 6173 6174 OperandMatchResultTy 6175 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6176 using namespace llvm::AMDGPU::MTBUFFormat; 6177 SMLoc Loc = getLoc(); 6178 6179 if (!parseExpr(Format)) 6180 return MatchOperand_ParseFail; 6181 if (!isValidFormatEncoding(Format, getSTI())) { 6182 Error(Loc, "out of range format"); 6183 return MatchOperand_ParseFail; 6184 } 6185 6186 return MatchOperand_Success; 6187 } 6188 6189 OperandMatchResultTy 6190 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6191 using namespace llvm::AMDGPU::MTBUFFormat; 6192 6193 if (!trySkipId("format", AsmToken::Colon)) 6194 return MatchOperand_NoMatch; 6195 6196 if (trySkipToken(AsmToken::LBrac)) { 6197 StringRef FormatStr; 6198 SMLoc Loc = getLoc(); 6199 if (!parseId(FormatStr, "expected a format string")) 6200 return MatchOperand_ParseFail; 6201 6202 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6203 if (Res == MatchOperand_NoMatch) 6204 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6205 if (Res != MatchOperand_Success) 6206 return Res; 6207 6208 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6209 return MatchOperand_ParseFail; 6210 6211 return MatchOperand_Success; 6212 } 6213 6214 return parseNumericFormat(Format); 6215 } 6216 6217 OperandMatchResultTy 6218 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6219 using namespace llvm::AMDGPU::MTBUFFormat; 6220 6221 int64_t Format = getDefaultFormatEncoding(getSTI()); 6222 OperandMatchResultTy Res; 6223 SMLoc Loc = getLoc(); 6224 6225 // Parse legacy format syntax. 6226 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6227 if (Res == MatchOperand_ParseFail) 6228 return Res; 6229 6230 bool FormatFound = (Res == MatchOperand_Success); 6231 6232 Operands.push_back( 6233 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6234 6235 if (FormatFound) 6236 trySkipToken(AsmToken::Comma); 6237 6238 if (isToken(AsmToken::EndOfStatement)) { 6239 // We are expecting an soffset operand, 6240 // but let matcher handle the error. 6241 return MatchOperand_Success; 6242 } 6243 6244 // Parse soffset. 6245 Res = parseRegOrImm(Operands); 6246 if (Res != MatchOperand_Success) 6247 return Res; 6248 6249 trySkipToken(AsmToken::Comma); 6250 6251 if (!FormatFound) { 6252 Res = parseSymbolicOrNumericFormat(Format); 6253 if (Res == MatchOperand_ParseFail) 6254 return Res; 6255 if (Res == MatchOperand_Success) { 6256 auto Size = Operands.size(); 6257 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6258 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6259 Op.setImm(Format); 6260 } 6261 return MatchOperand_Success; 6262 } 6263 6264 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6265 Error(getLoc(), "duplicate format"); 6266 return MatchOperand_ParseFail; 6267 } 6268 return MatchOperand_Success; 6269 } 6270 6271 //===----------------------------------------------------------------------===// 6272 // ds 6273 //===----------------------------------------------------------------------===// 6274 6275 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6276 const OperandVector &Operands) { 6277 OptionalImmIndexMap OptionalIdx; 6278 6279 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6280 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6281 6282 // Add the register arguments 6283 if (Op.isReg()) { 6284 Op.addRegOperands(Inst, 1); 6285 continue; 6286 } 6287 6288 // Handle optional arguments 6289 OptionalIdx[Op.getImmTy()] = i; 6290 } 6291 6292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6293 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6294 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6295 6296 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6297 } 6298 6299 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6300 bool IsGdsHardcoded) { 6301 OptionalImmIndexMap OptionalIdx; 6302 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6303 6304 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6305 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6306 6307 // Add the register arguments 6308 if (Op.isReg()) { 6309 Op.addRegOperands(Inst, 1); 6310 continue; 6311 } 6312 6313 if (Op.isToken() && Op.getToken() == "gds") { 6314 IsGdsHardcoded = true; 6315 continue; 6316 } 6317 6318 // Handle optional arguments 6319 OptionalIdx[Op.getImmTy()] = i; 6320 6321 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6322 OffsetType = AMDGPUOperand::ImmTySwizzle; 6323 } 6324 6325 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6326 6327 if (!IsGdsHardcoded) { 6328 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6329 } 6330 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6331 } 6332 6333 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6334 OptionalImmIndexMap OptionalIdx; 6335 6336 unsigned OperandIdx[4]; 6337 unsigned EnMask = 0; 6338 int SrcIdx = 0; 6339 6340 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6341 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6342 6343 // Add the register arguments 6344 if (Op.isReg()) { 6345 assert(SrcIdx < 4); 6346 OperandIdx[SrcIdx] = Inst.size(); 6347 Op.addRegOperands(Inst, 1); 6348 ++SrcIdx; 6349 continue; 6350 } 6351 6352 if (Op.isOff()) { 6353 assert(SrcIdx < 4); 6354 OperandIdx[SrcIdx] = Inst.size(); 6355 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6356 ++SrcIdx; 6357 continue; 6358 } 6359 6360 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6361 Op.addImmOperands(Inst, 1); 6362 continue; 6363 } 6364 6365 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6366 continue; 6367 6368 // Handle optional arguments 6369 OptionalIdx[Op.getImmTy()] = i; 6370 } 6371 6372 assert(SrcIdx == 4); 6373 6374 bool Compr = false; 6375 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6376 Compr = true; 6377 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6378 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6379 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6380 } 6381 6382 for (auto i = 0; i < SrcIdx; ++i) { 6383 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6384 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6385 } 6386 } 6387 6388 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6390 6391 Inst.addOperand(MCOperand::createImm(EnMask)); 6392 } 6393 6394 //===----------------------------------------------------------------------===// 6395 // s_waitcnt 6396 //===----------------------------------------------------------------------===// 6397 6398 static bool 6399 encodeCnt( 6400 const AMDGPU::IsaVersion ISA, 6401 int64_t &IntVal, 6402 int64_t CntVal, 6403 bool Saturate, 6404 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6405 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6406 { 6407 bool Failed = false; 6408 6409 IntVal = encode(ISA, IntVal, CntVal); 6410 if (CntVal != decode(ISA, IntVal)) { 6411 if (Saturate) { 6412 IntVal = encode(ISA, IntVal, -1); 6413 } else { 6414 Failed = true; 6415 } 6416 } 6417 return Failed; 6418 } 6419 6420 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6421 6422 SMLoc CntLoc = getLoc(); 6423 StringRef CntName = getTokenStr(); 6424 6425 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6426 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6427 return false; 6428 6429 int64_t CntVal; 6430 SMLoc ValLoc = getLoc(); 6431 if (!parseExpr(CntVal)) 6432 return false; 6433 6434 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6435 6436 bool Failed = true; 6437 bool Sat = CntName.endswith("_sat"); 6438 6439 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6440 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6441 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6442 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6443 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6444 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6445 } else { 6446 Error(CntLoc, "invalid counter name " + CntName); 6447 return false; 6448 } 6449 6450 if (Failed) { 6451 Error(ValLoc, "too large value for " + CntName); 6452 return false; 6453 } 6454 6455 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6456 return false; 6457 6458 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6459 if (isToken(AsmToken::EndOfStatement)) { 6460 Error(getLoc(), "expected a counter name"); 6461 return false; 6462 } 6463 } 6464 6465 return true; 6466 } 6467 6468 OperandMatchResultTy 6469 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6470 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6471 int64_t Waitcnt = getWaitcntBitMask(ISA); 6472 SMLoc S = getLoc(); 6473 6474 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6475 while (!isToken(AsmToken::EndOfStatement)) { 6476 if (!parseCnt(Waitcnt)) 6477 return MatchOperand_ParseFail; 6478 } 6479 } else { 6480 if (!parseExpr(Waitcnt)) 6481 return MatchOperand_ParseFail; 6482 } 6483 6484 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6485 return MatchOperand_Success; 6486 } 6487 6488 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6489 SMLoc FieldLoc = getLoc(); 6490 StringRef FieldName = getTokenStr(); 6491 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6492 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6493 return false; 6494 6495 SMLoc ValueLoc = getLoc(); 6496 StringRef ValueName = getTokenStr(); 6497 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6498 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6499 return false; 6500 6501 unsigned Shift; 6502 if (FieldName == "instid0") { 6503 Shift = 0; 6504 } else if (FieldName == "instskip") { 6505 Shift = 4; 6506 } else if (FieldName == "instid1") { 6507 Shift = 7; 6508 } else { 6509 Error(FieldLoc, "invalid field name " + FieldName); 6510 return false; 6511 } 6512 6513 int Value; 6514 if (Shift == 4) { 6515 // Parse values for instskip. 6516 Value = StringSwitch<int>(ValueName) 6517 .Case("SAME", 0) 6518 .Case("NEXT", 1) 6519 .Case("SKIP_1", 2) 6520 .Case("SKIP_2", 3) 6521 .Case("SKIP_3", 4) 6522 .Case("SKIP_4", 5) 6523 .Default(-1); 6524 } else { 6525 // Parse values for instid0 and instid1. 6526 Value = StringSwitch<int>(ValueName) 6527 .Case("NO_DEP", 0) 6528 .Case("VALU_DEP_1", 1) 6529 .Case("VALU_DEP_2", 2) 6530 .Case("VALU_DEP_3", 3) 6531 .Case("VALU_DEP_4", 4) 6532 .Case("TRANS32_DEP_1", 5) 6533 .Case("TRANS32_DEP_2", 6) 6534 .Case("TRANS32_DEP_3", 7) 6535 .Case("FMA_ACCUM_CYCLE_1", 8) 6536 .Case("SALU_CYCLE_1", 9) 6537 .Case("SALU_CYCLE_2", 10) 6538 .Case("SALU_CYCLE_3", 11) 6539 .Default(-1); 6540 } 6541 if (Value < 0) { 6542 Error(ValueLoc, "invalid value name " + ValueName); 6543 return false; 6544 } 6545 6546 Delay |= Value << Shift; 6547 return true; 6548 } 6549 6550 OperandMatchResultTy 6551 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6552 int64_t Delay = 0; 6553 SMLoc S = getLoc(); 6554 6555 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6556 do { 6557 if (!parseDelay(Delay)) 6558 return MatchOperand_ParseFail; 6559 } while (trySkipToken(AsmToken::Pipe)); 6560 } else { 6561 if (!parseExpr(Delay)) 6562 return MatchOperand_ParseFail; 6563 } 6564 6565 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6566 return MatchOperand_Success; 6567 } 6568 6569 bool 6570 AMDGPUOperand::isSWaitCnt() const { 6571 return isImm(); 6572 } 6573 6574 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6575 6576 //===----------------------------------------------------------------------===// 6577 // DepCtr 6578 //===----------------------------------------------------------------------===// 6579 6580 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6581 StringRef DepCtrName) { 6582 switch (ErrorId) { 6583 case OPR_ID_UNKNOWN: 6584 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6585 return; 6586 case OPR_ID_UNSUPPORTED: 6587 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6588 return; 6589 case OPR_ID_DUPLICATE: 6590 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6591 return; 6592 case OPR_VAL_INVALID: 6593 Error(Loc, Twine("invalid value for ", DepCtrName)); 6594 return; 6595 default: 6596 assert(false); 6597 } 6598 } 6599 6600 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6601 6602 using namespace llvm::AMDGPU::DepCtr; 6603 6604 SMLoc DepCtrLoc = getLoc(); 6605 StringRef DepCtrName = getTokenStr(); 6606 6607 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6608 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6609 return false; 6610 6611 int64_t ExprVal; 6612 if (!parseExpr(ExprVal)) 6613 return false; 6614 6615 unsigned PrevOprMask = UsedOprMask; 6616 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6617 6618 if (CntVal < 0) { 6619 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6620 return false; 6621 } 6622 6623 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6624 return false; 6625 6626 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6627 if (isToken(AsmToken::EndOfStatement)) { 6628 Error(getLoc(), "expected a counter name"); 6629 return false; 6630 } 6631 } 6632 6633 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6634 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6635 return true; 6636 } 6637 6638 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6639 using namespace llvm::AMDGPU::DepCtr; 6640 6641 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6642 SMLoc Loc = getLoc(); 6643 6644 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6645 unsigned UsedOprMask = 0; 6646 while (!isToken(AsmToken::EndOfStatement)) { 6647 if (!parseDepCtr(DepCtr, UsedOprMask)) 6648 return MatchOperand_ParseFail; 6649 } 6650 } else { 6651 if (!parseExpr(DepCtr)) 6652 return MatchOperand_ParseFail; 6653 } 6654 6655 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6656 return MatchOperand_Success; 6657 } 6658 6659 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6660 6661 //===----------------------------------------------------------------------===// 6662 // hwreg 6663 //===----------------------------------------------------------------------===// 6664 6665 bool 6666 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6667 OperandInfoTy &Offset, 6668 OperandInfoTy &Width) { 6669 using namespace llvm::AMDGPU::Hwreg; 6670 6671 // The register may be specified by name or using a numeric code 6672 HwReg.Loc = getLoc(); 6673 if (isToken(AsmToken::Identifier) && 6674 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6675 HwReg.IsSymbolic = true; 6676 lex(); // skip register name 6677 } else if (!parseExpr(HwReg.Id, "a register name")) { 6678 return false; 6679 } 6680 6681 if (trySkipToken(AsmToken::RParen)) 6682 return true; 6683 6684 // parse optional params 6685 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6686 return false; 6687 6688 Offset.Loc = getLoc(); 6689 if (!parseExpr(Offset.Id)) 6690 return false; 6691 6692 if (!skipToken(AsmToken::Comma, "expected a comma")) 6693 return false; 6694 6695 Width.Loc = getLoc(); 6696 return parseExpr(Width.Id) && 6697 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6698 } 6699 6700 bool 6701 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6702 const OperandInfoTy &Offset, 6703 const OperandInfoTy &Width) { 6704 6705 using namespace llvm::AMDGPU::Hwreg; 6706 6707 if (HwReg.IsSymbolic) { 6708 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6709 Error(HwReg.Loc, 6710 "specified hardware register is not supported on this GPU"); 6711 return false; 6712 } 6713 } else { 6714 if (!isValidHwreg(HwReg.Id)) { 6715 Error(HwReg.Loc, 6716 "invalid code of hardware register: only 6-bit values are legal"); 6717 return false; 6718 } 6719 } 6720 if (!isValidHwregOffset(Offset.Id)) { 6721 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6722 return false; 6723 } 6724 if (!isValidHwregWidth(Width.Id)) { 6725 Error(Width.Loc, 6726 "invalid bitfield width: only values from 1 to 32 are legal"); 6727 return false; 6728 } 6729 return true; 6730 } 6731 6732 OperandMatchResultTy 6733 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6734 using namespace llvm::AMDGPU::Hwreg; 6735 6736 int64_t ImmVal = 0; 6737 SMLoc Loc = getLoc(); 6738 6739 if (trySkipId("hwreg", AsmToken::LParen)) { 6740 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6741 OperandInfoTy Offset(OFFSET_DEFAULT_); 6742 OperandInfoTy Width(WIDTH_DEFAULT_); 6743 if (parseHwregBody(HwReg, Offset, Width) && 6744 validateHwreg(HwReg, Offset, Width)) { 6745 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6746 } else { 6747 return MatchOperand_ParseFail; 6748 } 6749 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6750 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6751 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6752 return MatchOperand_ParseFail; 6753 } 6754 } else { 6755 return MatchOperand_ParseFail; 6756 } 6757 6758 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6759 return MatchOperand_Success; 6760 } 6761 6762 bool AMDGPUOperand::isHwreg() const { 6763 return isImmTy(ImmTyHwreg); 6764 } 6765 6766 //===----------------------------------------------------------------------===// 6767 // sendmsg 6768 //===----------------------------------------------------------------------===// 6769 6770 bool 6771 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6772 OperandInfoTy &Op, 6773 OperandInfoTy &Stream) { 6774 using namespace llvm::AMDGPU::SendMsg; 6775 6776 Msg.Loc = getLoc(); 6777 if (isToken(AsmToken::Identifier) && 6778 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6779 Msg.IsSymbolic = true; 6780 lex(); // skip message name 6781 } else if (!parseExpr(Msg.Id, "a message name")) { 6782 return false; 6783 } 6784 6785 if (trySkipToken(AsmToken::Comma)) { 6786 Op.IsDefined = true; 6787 Op.Loc = getLoc(); 6788 if (isToken(AsmToken::Identifier) && 6789 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6790 lex(); // skip operation name 6791 } else if (!parseExpr(Op.Id, "an operation name")) { 6792 return false; 6793 } 6794 6795 if (trySkipToken(AsmToken::Comma)) { 6796 Stream.IsDefined = true; 6797 Stream.Loc = getLoc(); 6798 if (!parseExpr(Stream.Id)) 6799 return false; 6800 } 6801 } 6802 6803 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6804 } 6805 6806 bool 6807 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6808 const OperandInfoTy &Op, 6809 const OperandInfoTy &Stream) { 6810 using namespace llvm::AMDGPU::SendMsg; 6811 6812 // Validation strictness depends on whether message is specified 6813 // in a symbolic or in a numeric form. In the latter case 6814 // only encoding possibility is checked. 6815 bool Strict = Msg.IsSymbolic; 6816 6817 if (Strict) { 6818 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6819 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6820 return false; 6821 } 6822 } else { 6823 if (!isValidMsgId(Msg.Id, getSTI())) { 6824 Error(Msg.Loc, "invalid message id"); 6825 return false; 6826 } 6827 } 6828 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6829 if (Op.IsDefined) { 6830 Error(Op.Loc, "message does not support operations"); 6831 } else { 6832 Error(Msg.Loc, "missing message operation"); 6833 } 6834 return false; 6835 } 6836 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6837 Error(Op.Loc, "invalid operation id"); 6838 return false; 6839 } 6840 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6841 Stream.IsDefined) { 6842 Error(Stream.Loc, "message operation does not support streams"); 6843 return false; 6844 } 6845 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6846 Error(Stream.Loc, "invalid message stream id"); 6847 return false; 6848 } 6849 return true; 6850 } 6851 6852 OperandMatchResultTy 6853 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6854 using namespace llvm::AMDGPU::SendMsg; 6855 6856 int64_t ImmVal = 0; 6857 SMLoc Loc = getLoc(); 6858 6859 if (trySkipId("sendmsg", AsmToken::LParen)) { 6860 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6861 OperandInfoTy Op(OP_NONE_); 6862 OperandInfoTy Stream(STREAM_ID_NONE_); 6863 if (parseSendMsgBody(Msg, Op, Stream) && 6864 validateSendMsg(Msg, Op, Stream)) { 6865 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6866 } else { 6867 return MatchOperand_ParseFail; 6868 } 6869 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6870 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6871 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6872 return MatchOperand_ParseFail; 6873 } 6874 } else { 6875 return MatchOperand_ParseFail; 6876 } 6877 6878 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6879 return MatchOperand_Success; 6880 } 6881 6882 bool AMDGPUOperand::isSendMsg() const { 6883 return isImmTy(ImmTySendMsg); 6884 } 6885 6886 //===----------------------------------------------------------------------===// 6887 // v_interp 6888 //===----------------------------------------------------------------------===// 6889 6890 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6891 StringRef Str; 6892 SMLoc S = getLoc(); 6893 6894 if (!parseId(Str)) 6895 return MatchOperand_NoMatch; 6896 6897 int Slot = StringSwitch<int>(Str) 6898 .Case("p10", 0) 6899 .Case("p20", 1) 6900 .Case("p0", 2) 6901 .Default(-1); 6902 6903 if (Slot == -1) { 6904 Error(S, "invalid interpolation slot"); 6905 return MatchOperand_ParseFail; 6906 } 6907 6908 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6909 AMDGPUOperand::ImmTyInterpSlot)); 6910 return MatchOperand_Success; 6911 } 6912 6913 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6914 StringRef Str; 6915 SMLoc S = getLoc(); 6916 6917 if (!parseId(Str)) 6918 return MatchOperand_NoMatch; 6919 6920 if (!Str.startswith("attr")) { 6921 Error(S, "invalid interpolation attribute"); 6922 return MatchOperand_ParseFail; 6923 } 6924 6925 StringRef Chan = Str.take_back(2); 6926 int AttrChan = StringSwitch<int>(Chan) 6927 .Case(".x", 0) 6928 .Case(".y", 1) 6929 .Case(".z", 2) 6930 .Case(".w", 3) 6931 .Default(-1); 6932 if (AttrChan == -1) { 6933 Error(S, "invalid or missing interpolation attribute channel"); 6934 return MatchOperand_ParseFail; 6935 } 6936 6937 Str = Str.drop_back(2).drop_front(4); 6938 6939 uint8_t Attr; 6940 if (Str.getAsInteger(10, Attr)) { 6941 Error(S, "invalid or missing interpolation attribute number"); 6942 return MatchOperand_ParseFail; 6943 } 6944 6945 if (Attr > 63) { 6946 Error(S, "out of bounds interpolation attribute number"); 6947 return MatchOperand_ParseFail; 6948 } 6949 6950 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6951 6952 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6953 AMDGPUOperand::ImmTyInterpAttr)); 6954 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6955 AMDGPUOperand::ImmTyAttrChan)); 6956 return MatchOperand_Success; 6957 } 6958 6959 //===----------------------------------------------------------------------===// 6960 // exp 6961 //===----------------------------------------------------------------------===// 6962 6963 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6964 using namespace llvm::AMDGPU::Exp; 6965 6966 StringRef Str; 6967 SMLoc S = getLoc(); 6968 6969 if (!parseId(Str)) 6970 return MatchOperand_NoMatch; 6971 6972 unsigned Id = getTgtId(Str); 6973 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6974 Error(S, (Id == ET_INVALID) ? 6975 "invalid exp target" : 6976 "exp target is not supported on this GPU"); 6977 return MatchOperand_ParseFail; 6978 } 6979 6980 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6981 AMDGPUOperand::ImmTyExpTgt)); 6982 return MatchOperand_Success; 6983 } 6984 6985 //===----------------------------------------------------------------------===// 6986 // parser helpers 6987 //===----------------------------------------------------------------------===// 6988 6989 bool 6990 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6991 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6992 } 6993 6994 bool 6995 AMDGPUAsmParser::isId(const StringRef Id) const { 6996 return isId(getToken(), Id); 6997 } 6998 6999 bool 7000 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7001 return getTokenKind() == Kind; 7002 } 7003 7004 bool 7005 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7006 if (isId(Id)) { 7007 lex(); 7008 return true; 7009 } 7010 return false; 7011 } 7012 7013 bool 7014 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7015 if (isToken(AsmToken::Identifier)) { 7016 StringRef Tok = getTokenStr(); 7017 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7018 lex(); 7019 return true; 7020 } 7021 } 7022 return false; 7023 } 7024 7025 bool 7026 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7027 if (isId(Id) && peekToken().is(Kind)) { 7028 lex(); 7029 lex(); 7030 return true; 7031 } 7032 return false; 7033 } 7034 7035 bool 7036 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7037 if (isToken(Kind)) { 7038 lex(); 7039 return true; 7040 } 7041 return false; 7042 } 7043 7044 bool 7045 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7046 const StringRef ErrMsg) { 7047 if (!trySkipToken(Kind)) { 7048 Error(getLoc(), ErrMsg); 7049 return false; 7050 } 7051 return true; 7052 } 7053 7054 bool 7055 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7056 SMLoc S = getLoc(); 7057 7058 const MCExpr *Expr; 7059 if (Parser.parseExpression(Expr)) 7060 return false; 7061 7062 if (Expr->evaluateAsAbsolute(Imm)) 7063 return true; 7064 7065 if (Expected.empty()) { 7066 Error(S, "expected absolute expression"); 7067 } else { 7068 Error(S, Twine("expected ", Expected) + 7069 Twine(" or an absolute expression")); 7070 } 7071 return false; 7072 } 7073 7074 bool 7075 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7076 SMLoc S = getLoc(); 7077 7078 const MCExpr *Expr; 7079 if (Parser.parseExpression(Expr)) 7080 return false; 7081 7082 int64_t IntVal; 7083 if (Expr->evaluateAsAbsolute(IntVal)) { 7084 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7085 } else { 7086 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7087 } 7088 return true; 7089 } 7090 7091 bool 7092 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7093 if (isToken(AsmToken::String)) { 7094 Val = getToken().getStringContents(); 7095 lex(); 7096 return true; 7097 } else { 7098 Error(getLoc(), ErrMsg); 7099 return false; 7100 } 7101 } 7102 7103 bool 7104 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7105 if (isToken(AsmToken::Identifier)) { 7106 Val = getTokenStr(); 7107 lex(); 7108 return true; 7109 } else { 7110 if (!ErrMsg.empty()) 7111 Error(getLoc(), ErrMsg); 7112 return false; 7113 } 7114 } 7115 7116 AsmToken 7117 AMDGPUAsmParser::getToken() const { 7118 return Parser.getTok(); 7119 } 7120 7121 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7122 return isToken(AsmToken::EndOfStatement) 7123 ? getToken() 7124 : getLexer().peekTok(ShouldSkipSpace); 7125 } 7126 7127 void 7128 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7129 auto TokCount = getLexer().peekTokens(Tokens); 7130 7131 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7132 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7133 } 7134 7135 AsmToken::TokenKind 7136 AMDGPUAsmParser::getTokenKind() const { 7137 return getLexer().getKind(); 7138 } 7139 7140 SMLoc 7141 AMDGPUAsmParser::getLoc() const { 7142 return getToken().getLoc(); 7143 } 7144 7145 StringRef 7146 AMDGPUAsmParser::getTokenStr() const { 7147 return getToken().getString(); 7148 } 7149 7150 void 7151 AMDGPUAsmParser::lex() { 7152 Parser.Lex(); 7153 } 7154 7155 SMLoc 7156 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7157 const OperandVector &Operands) const { 7158 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7159 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7160 if (Test(Op)) 7161 return Op.getStartLoc(); 7162 } 7163 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7164 } 7165 7166 SMLoc 7167 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7168 const OperandVector &Operands) const { 7169 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7170 return getOperandLoc(Test, Operands); 7171 } 7172 7173 SMLoc 7174 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7175 const OperandVector &Operands) const { 7176 auto Test = [=](const AMDGPUOperand& Op) { 7177 return Op.isRegKind() && Op.getReg() == Reg; 7178 }; 7179 return getOperandLoc(Test, Operands); 7180 } 7181 7182 SMLoc 7183 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7184 auto Test = [](const AMDGPUOperand& Op) { 7185 return Op.IsImmKindLiteral() || Op.isExpr(); 7186 }; 7187 return getOperandLoc(Test, Operands); 7188 } 7189 7190 SMLoc 7191 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7192 auto Test = [](const AMDGPUOperand& Op) { 7193 return Op.isImmKindConst(); 7194 }; 7195 return getOperandLoc(Test, Operands); 7196 } 7197 7198 //===----------------------------------------------------------------------===// 7199 // swizzle 7200 //===----------------------------------------------------------------------===// 7201 7202 LLVM_READNONE 7203 static unsigned 7204 encodeBitmaskPerm(const unsigned AndMask, 7205 const unsigned OrMask, 7206 const unsigned XorMask) { 7207 using namespace llvm::AMDGPU::Swizzle; 7208 7209 return BITMASK_PERM_ENC | 7210 (AndMask << BITMASK_AND_SHIFT) | 7211 (OrMask << BITMASK_OR_SHIFT) | 7212 (XorMask << BITMASK_XOR_SHIFT); 7213 } 7214 7215 bool 7216 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7217 const unsigned MinVal, 7218 const unsigned MaxVal, 7219 const StringRef ErrMsg, 7220 SMLoc &Loc) { 7221 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7222 return false; 7223 } 7224 Loc = getLoc(); 7225 if (!parseExpr(Op)) { 7226 return false; 7227 } 7228 if (Op < MinVal || Op > MaxVal) { 7229 Error(Loc, ErrMsg); 7230 return false; 7231 } 7232 7233 return true; 7234 } 7235 7236 bool 7237 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7238 const unsigned MinVal, 7239 const unsigned MaxVal, 7240 const StringRef ErrMsg) { 7241 SMLoc Loc; 7242 for (unsigned i = 0; i < OpNum; ++i) { 7243 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7244 return false; 7245 } 7246 7247 return true; 7248 } 7249 7250 bool 7251 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7252 using namespace llvm::AMDGPU::Swizzle; 7253 7254 int64_t Lane[LANE_NUM]; 7255 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7256 "expected a 2-bit lane id")) { 7257 Imm = QUAD_PERM_ENC; 7258 for (unsigned I = 0; I < LANE_NUM; ++I) { 7259 Imm |= Lane[I] << (LANE_SHIFT * I); 7260 } 7261 return true; 7262 } 7263 return false; 7264 } 7265 7266 bool 7267 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7268 using namespace llvm::AMDGPU::Swizzle; 7269 7270 SMLoc Loc; 7271 int64_t GroupSize; 7272 int64_t LaneIdx; 7273 7274 if (!parseSwizzleOperand(GroupSize, 7275 2, 32, 7276 "group size must be in the interval [2,32]", 7277 Loc)) { 7278 return false; 7279 } 7280 if (!isPowerOf2_64(GroupSize)) { 7281 Error(Loc, "group size must be a power of two"); 7282 return false; 7283 } 7284 if (parseSwizzleOperand(LaneIdx, 7285 0, GroupSize - 1, 7286 "lane id must be in the interval [0,group size - 1]", 7287 Loc)) { 7288 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7289 return true; 7290 } 7291 return false; 7292 } 7293 7294 bool 7295 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7296 using namespace llvm::AMDGPU::Swizzle; 7297 7298 SMLoc Loc; 7299 int64_t GroupSize; 7300 7301 if (!parseSwizzleOperand(GroupSize, 7302 2, 32, 7303 "group size must be in the interval [2,32]", 7304 Loc)) { 7305 return false; 7306 } 7307 if (!isPowerOf2_64(GroupSize)) { 7308 Error(Loc, "group size must be a power of two"); 7309 return false; 7310 } 7311 7312 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7313 return true; 7314 } 7315 7316 bool 7317 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7318 using namespace llvm::AMDGPU::Swizzle; 7319 7320 SMLoc Loc; 7321 int64_t GroupSize; 7322 7323 if (!parseSwizzleOperand(GroupSize, 7324 1, 16, 7325 "group size must be in the interval [1,16]", 7326 Loc)) { 7327 return false; 7328 } 7329 if (!isPowerOf2_64(GroupSize)) { 7330 Error(Loc, "group size must be a power of two"); 7331 return false; 7332 } 7333 7334 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7335 return true; 7336 } 7337 7338 bool 7339 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7340 using namespace llvm::AMDGPU::Swizzle; 7341 7342 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7343 return false; 7344 } 7345 7346 StringRef Ctl; 7347 SMLoc StrLoc = getLoc(); 7348 if (!parseString(Ctl)) { 7349 return false; 7350 } 7351 if (Ctl.size() != BITMASK_WIDTH) { 7352 Error(StrLoc, "expected a 5-character mask"); 7353 return false; 7354 } 7355 7356 unsigned AndMask = 0; 7357 unsigned OrMask = 0; 7358 unsigned XorMask = 0; 7359 7360 for (size_t i = 0; i < Ctl.size(); ++i) { 7361 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7362 switch(Ctl[i]) { 7363 default: 7364 Error(StrLoc, "invalid mask"); 7365 return false; 7366 case '0': 7367 break; 7368 case '1': 7369 OrMask |= Mask; 7370 break; 7371 case 'p': 7372 AndMask |= Mask; 7373 break; 7374 case 'i': 7375 AndMask |= Mask; 7376 XorMask |= Mask; 7377 break; 7378 } 7379 } 7380 7381 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7382 return true; 7383 } 7384 7385 bool 7386 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7387 7388 SMLoc OffsetLoc = getLoc(); 7389 7390 if (!parseExpr(Imm, "a swizzle macro")) { 7391 return false; 7392 } 7393 if (!isUInt<16>(Imm)) { 7394 Error(OffsetLoc, "expected a 16-bit offset"); 7395 return false; 7396 } 7397 return true; 7398 } 7399 7400 bool 7401 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7402 using namespace llvm::AMDGPU::Swizzle; 7403 7404 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7405 7406 SMLoc ModeLoc = getLoc(); 7407 bool Ok = false; 7408 7409 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7410 Ok = parseSwizzleQuadPerm(Imm); 7411 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7412 Ok = parseSwizzleBitmaskPerm(Imm); 7413 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7414 Ok = parseSwizzleBroadcast(Imm); 7415 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7416 Ok = parseSwizzleSwap(Imm); 7417 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7418 Ok = parseSwizzleReverse(Imm); 7419 } else { 7420 Error(ModeLoc, "expected a swizzle mode"); 7421 } 7422 7423 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7424 } 7425 7426 return false; 7427 } 7428 7429 OperandMatchResultTy 7430 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7431 SMLoc S = getLoc(); 7432 int64_t Imm = 0; 7433 7434 if (trySkipId("offset")) { 7435 7436 bool Ok = false; 7437 if (skipToken(AsmToken::Colon, "expected a colon")) { 7438 if (trySkipId("swizzle")) { 7439 Ok = parseSwizzleMacro(Imm); 7440 } else { 7441 Ok = parseSwizzleOffset(Imm); 7442 } 7443 } 7444 7445 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7446 7447 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7448 } else { 7449 // Swizzle "offset" operand is optional. 7450 // If it is omitted, try parsing other optional operands. 7451 return parseOptionalOpr(Operands); 7452 } 7453 } 7454 7455 bool 7456 AMDGPUOperand::isSwizzle() const { 7457 return isImmTy(ImmTySwizzle); 7458 } 7459 7460 //===----------------------------------------------------------------------===// 7461 // VGPR Index Mode 7462 //===----------------------------------------------------------------------===// 7463 7464 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7465 7466 using namespace llvm::AMDGPU::VGPRIndexMode; 7467 7468 if (trySkipToken(AsmToken::RParen)) { 7469 return OFF; 7470 } 7471 7472 int64_t Imm = 0; 7473 7474 while (true) { 7475 unsigned Mode = 0; 7476 SMLoc S = getLoc(); 7477 7478 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7479 if (trySkipId(IdSymbolic[ModeId])) { 7480 Mode = 1 << ModeId; 7481 break; 7482 } 7483 } 7484 7485 if (Mode == 0) { 7486 Error(S, (Imm == 0)? 7487 "expected a VGPR index mode or a closing parenthesis" : 7488 "expected a VGPR index mode"); 7489 return UNDEF; 7490 } 7491 7492 if (Imm & Mode) { 7493 Error(S, "duplicate VGPR index mode"); 7494 return UNDEF; 7495 } 7496 Imm |= Mode; 7497 7498 if (trySkipToken(AsmToken::RParen)) 7499 break; 7500 if (!skipToken(AsmToken::Comma, 7501 "expected a comma or a closing parenthesis")) 7502 return UNDEF; 7503 } 7504 7505 return Imm; 7506 } 7507 7508 OperandMatchResultTy 7509 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7510 7511 using namespace llvm::AMDGPU::VGPRIndexMode; 7512 7513 int64_t Imm = 0; 7514 SMLoc S = getLoc(); 7515 7516 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7517 Imm = parseGPRIdxMacro(); 7518 if (Imm == UNDEF) 7519 return MatchOperand_ParseFail; 7520 } else { 7521 if (getParser().parseAbsoluteExpression(Imm)) 7522 return MatchOperand_ParseFail; 7523 if (Imm < 0 || !isUInt<4>(Imm)) { 7524 Error(S, "invalid immediate: only 4-bit values are legal"); 7525 return MatchOperand_ParseFail; 7526 } 7527 } 7528 7529 Operands.push_back( 7530 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7531 return MatchOperand_Success; 7532 } 7533 7534 bool AMDGPUOperand::isGPRIdxMode() const { 7535 return isImmTy(ImmTyGprIdxMode); 7536 } 7537 7538 //===----------------------------------------------------------------------===// 7539 // sopp branch targets 7540 //===----------------------------------------------------------------------===// 7541 7542 OperandMatchResultTy 7543 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7544 7545 // Make sure we are not parsing something 7546 // that looks like a label or an expression but is not. 7547 // This will improve error messages. 7548 if (isRegister() || isModifier()) 7549 return MatchOperand_NoMatch; 7550 7551 if (!parseExpr(Operands)) 7552 return MatchOperand_ParseFail; 7553 7554 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7555 assert(Opr.isImm() || Opr.isExpr()); 7556 SMLoc Loc = Opr.getStartLoc(); 7557 7558 // Currently we do not support arbitrary expressions as branch targets. 7559 // Only labels and absolute expressions are accepted. 7560 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7561 Error(Loc, "expected an absolute expression or a label"); 7562 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7563 Error(Loc, "expected a 16-bit signed jump offset"); 7564 } 7565 7566 return MatchOperand_Success; 7567 } 7568 7569 //===----------------------------------------------------------------------===// 7570 // Boolean holding registers 7571 //===----------------------------------------------------------------------===// 7572 7573 OperandMatchResultTy 7574 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7575 return parseReg(Operands); 7576 } 7577 7578 //===----------------------------------------------------------------------===// 7579 // mubuf 7580 //===----------------------------------------------------------------------===// 7581 7582 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7583 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7584 } 7585 7586 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7587 const OperandVector &Operands, 7588 bool IsAtomic, 7589 bool IsLds) { 7590 OptionalImmIndexMap OptionalIdx; 7591 unsigned FirstOperandIdx = 1; 7592 bool IsAtomicReturn = false; 7593 7594 if (IsAtomic) { 7595 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7596 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7597 if (!Op.isCPol()) 7598 continue; 7599 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7600 break; 7601 } 7602 7603 if (!IsAtomicReturn) { 7604 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7605 if (NewOpc != -1) 7606 Inst.setOpcode(NewOpc); 7607 } 7608 7609 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7610 SIInstrFlags::IsAtomicRet; 7611 } 7612 7613 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7614 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7615 7616 // Add the register arguments 7617 if (Op.isReg()) { 7618 Op.addRegOperands(Inst, 1); 7619 // Insert a tied src for atomic return dst. 7620 // This cannot be postponed as subsequent calls to 7621 // addImmOperands rely on correct number of MC operands. 7622 if (IsAtomicReturn && i == FirstOperandIdx) 7623 Op.addRegOperands(Inst, 1); 7624 continue; 7625 } 7626 7627 // Handle the case where soffset is an immediate 7628 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7629 Op.addImmOperands(Inst, 1); 7630 continue; 7631 } 7632 7633 // Handle tokens like 'offen' which are sometimes hard-coded into the 7634 // asm string. There are no MCInst operands for these. 7635 if (Op.isToken()) { 7636 continue; 7637 } 7638 assert(Op.isImm()); 7639 7640 // Handle optional arguments 7641 OptionalIdx[Op.getImmTy()] = i; 7642 } 7643 7644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7646 7647 if (!IsLds) { // tfe is not legal with lds opcodes 7648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7649 } 7650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7651 } 7652 7653 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7654 OptionalImmIndexMap OptionalIdx; 7655 7656 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7657 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7658 7659 // Add the register arguments 7660 if (Op.isReg()) { 7661 Op.addRegOperands(Inst, 1); 7662 continue; 7663 } 7664 7665 // Handle the case where soffset is an immediate 7666 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7667 Op.addImmOperands(Inst, 1); 7668 continue; 7669 } 7670 7671 // Handle tokens like 'offen' which are sometimes hard-coded into the 7672 // asm string. There are no MCInst operands for these. 7673 if (Op.isToken()) { 7674 continue; 7675 } 7676 assert(Op.isImm()); 7677 7678 // Handle optional arguments 7679 OptionalIdx[Op.getImmTy()] = i; 7680 } 7681 7682 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7683 AMDGPUOperand::ImmTyOffset); 7684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7688 } 7689 7690 //===----------------------------------------------------------------------===// 7691 // mimg 7692 //===----------------------------------------------------------------------===// 7693 7694 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7695 bool IsAtomic) { 7696 unsigned I = 1; 7697 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7698 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7699 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7700 } 7701 7702 if (IsAtomic) { 7703 // Add src, same as dst 7704 assert(Desc.getNumDefs() == 1); 7705 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7706 } 7707 7708 OptionalImmIndexMap OptionalIdx; 7709 7710 for (unsigned E = Operands.size(); I != E; ++I) { 7711 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7712 7713 // Add the register arguments 7714 if (Op.isReg()) { 7715 Op.addRegOperands(Inst, 1); 7716 } else if (Op.isImmModifier()) { 7717 OptionalIdx[Op.getImmTy()] = I; 7718 } else if (!Op.isToken()) { 7719 llvm_unreachable("unexpected operand type"); 7720 } 7721 } 7722 7723 bool IsGFX10Plus = isGFX10Plus(); 7724 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7726 if (IsGFX10Plus) 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7731 if (IsGFX10Plus) 7732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7733 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7734 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7736 if (!IsGFX10Plus) 7737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7739 } 7740 7741 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7742 cvtMIMG(Inst, Operands, true); 7743 } 7744 7745 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7746 OptionalImmIndexMap OptionalIdx; 7747 bool IsAtomicReturn = false; 7748 7749 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7750 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7751 if (!Op.isCPol()) 7752 continue; 7753 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7754 break; 7755 } 7756 7757 if (!IsAtomicReturn) { 7758 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7759 if (NewOpc != -1) 7760 Inst.setOpcode(NewOpc); 7761 } 7762 7763 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7764 SIInstrFlags::IsAtomicRet; 7765 7766 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7767 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7768 7769 // Add the register arguments 7770 if (Op.isReg()) { 7771 Op.addRegOperands(Inst, 1); 7772 if (IsAtomicReturn && i == 1) 7773 Op.addRegOperands(Inst, 1); 7774 continue; 7775 } 7776 7777 // Handle the case where soffset is an immediate 7778 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7779 Op.addImmOperands(Inst, 1); 7780 continue; 7781 } 7782 7783 // Handle tokens like 'offen' which are sometimes hard-coded into the 7784 // asm string. There are no MCInst operands for these. 7785 if (Op.isToken()) { 7786 continue; 7787 } 7788 assert(Op.isImm()); 7789 7790 // Handle optional arguments 7791 OptionalIdx[Op.getImmTy()] = i; 7792 } 7793 7794 if ((int)Inst.getNumOperands() <= 7795 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7797 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7798 } 7799 7800 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7801 const OperandVector &Operands) { 7802 for (unsigned I = 1; I < Operands.size(); ++I) { 7803 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7804 if (Operand.isReg()) 7805 Operand.addRegOperands(Inst, 1); 7806 } 7807 7808 Inst.addOperand(MCOperand::createImm(1)); // a16 7809 } 7810 7811 //===----------------------------------------------------------------------===// 7812 // smrd 7813 //===----------------------------------------------------------------------===// 7814 7815 bool AMDGPUOperand::isSMRDOffset8() const { 7816 return isImm() && isUInt<8>(getImm()); 7817 } 7818 7819 bool AMDGPUOperand::isSMEMOffset() const { 7820 return isImmTy(ImmTyNone) || 7821 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7822 } 7823 7824 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7825 // 32-bit literals are only supported on CI and we only want to use them 7826 // when the offset is > 8-bits. 7827 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7828 } 7829 7830 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7831 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7832 } 7833 7834 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7835 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7836 } 7837 7838 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7839 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7840 } 7841 7842 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7843 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7844 } 7845 7846 //===----------------------------------------------------------------------===// 7847 // vop3 7848 //===----------------------------------------------------------------------===// 7849 7850 static bool ConvertOmodMul(int64_t &Mul) { 7851 if (Mul != 1 && Mul != 2 && Mul != 4) 7852 return false; 7853 7854 Mul >>= 1; 7855 return true; 7856 } 7857 7858 static bool ConvertOmodDiv(int64_t &Div) { 7859 if (Div == 1) { 7860 Div = 0; 7861 return true; 7862 } 7863 7864 if (Div == 2) { 7865 Div = 3; 7866 return true; 7867 } 7868 7869 return false; 7870 } 7871 7872 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7873 // This is intentional and ensures compatibility with sp3. 7874 // See bug 35397 for details. 7875 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7876 if (BoundCtrl == 0 || BoundCtrl == 1) { 7877 BoundCtrl = 1; 7878 return true; 7879 } 7880 return false; 7881 } 7882 7883 // Note: the order in this table matches the order of operands in AsmString. 7884 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7885 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7886 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7887 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7888 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7889 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7890 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7891 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7892 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7893 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7894 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7895 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7896 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7897 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7898 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7899 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7900 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7901 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7902 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7903 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7904 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7905 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7906 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7907 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7908 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7909 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7910 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7911 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7912 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7913 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7914 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7915 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7916 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7917 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7918 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7919 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7920 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7921 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7922 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7923 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7924 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7925 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7926 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7927 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7928 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7929 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7930 }; 7931 7932 void AMDGPUAsmParser::onBeginOfFile() { 7933 if (!getParser().getStreamer().getTargetStreamer() || 7934 getSTI().getTargetTriple().getArch() == Triple::r600) 7935 return; 7936 7937 if (!getTargetStreamer().getTargetID()) 7938 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7939 7940 if (isHsaAbiVersion3AndAbove(&getSTI())) 7941 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7942 } 7943 7944 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7945 7946 OperandMatchResultTy res = parseOptionalOpr(Operands); 7947 7948 // This is a hack to enable hardcoded mandatory operands which follow 7949 // optional operands. 7950 // 7951 // Current design assumes that all operands after the first optional operand 7952 // are also optional. However implementation of some instructions violates 7953 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7954 // 7955 // To alleviate this problem, we have to (implicitly) parse extra operands 7956 // to make sure autogenerated parser of custom operands never hit hardcoded 7957 // mandatory operands. 7958 7959 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7960 if (res != MatchOperand_Success || 7961 isToken(AsmToken::EndOfStatement)) 7962 break; 7963 7964 trySkipToken(AsmToken::Comma); 7965 res = parseOptionalOpr(Operands); 7966 } 7967 7968 return res; 7969 } 7970 7971 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7972 OperandMatchResultTy res; 7973 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7974 // try to parse any optional operand here 7975 if (Op.IsBit) { 7976 res = parseNamedBit(Op.Name, Operands, Op.Type); 7977 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7978 res = parseOModOperand(Operands); 7979 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7980 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7981 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7982 res = parseSDWASel(Operands, Op.Name, Op.Type); 7983 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7984 res = parseSDWADstUnused(Operands); 7985 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7986 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7987 Op.Type == AMDGPUOperand::ImmTyNegLo || 7988 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7989 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7990 Op.ConvertResult); 7991 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7992 res = parseDim(Operands); 7993 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7994 res = parseCPol(Operands); 7995 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7996 res = parseDPP8(Operands); 7997 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7998 res = parseDPPCtrl(Operands); 7999 } else { 8000 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 8001 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 8002 res = parseOperandArrayWithPrefix("neg", Operands, 8003 AMDGPUOperand::ImmTyBLGP, 8004 nullptr); 8005 } 8006 } 8007 if (res != MatchOperand_NoMatch) { 8008 return res; 8009 } 8010 } 8011 return MatchOperand_NoMatch; 8012 } 8013 8014 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8015 StringRef Name = getTokenStr(); 8016 if (Name == "mul") { 8017 return parseIntWithPrefix("mul", Operands, 8018 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8019 } 8020 8021 if (Name == "div") { 8022 return parseIntWithPrefix("div", Operands, 8023 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8024 } 8025 8026 return MatchOperand_NoMatch; 8027 } 8028 8029 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8030 // the number of src operands present, then copies that bit into src0_modifiers. 8031 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8032 int Opc = Inst.getOpcode(); 8033 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8034 if (OpSelIdx == -1) 8035 return; 8036 8037 int SrcNum; 8038 const int Ops[] = { AMDGPU::OpName::src0, 8039 AMDGPU::OpName::src1, 8040 AMDGPU::OpName::src2 }; 8041 for (SrcNum = 0; 8042 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8043 ++SrcNum); 8044 assert(SrcNum > 0); 8045 8046 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8047 8048 if ((OpSel & (1 << SrcNum)) != 0) { 8049 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8050 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8051 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8052 } 8053 } 8054 8055 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8056 const OperandVector &Operands) { 8057 cvtVOP3P(Inst, Operands); 8058 cvtVOP3DstOpSelOnly(Inst); 8059 } 8060 8061 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8062 OptionalImmIndexMap &OptionalIdx) { 8063 cvtVOP3P(Inst, Operands, OptionalIdx); 8064 cvtVOP3DstOpSelOnly(Inst); 8065 } 8066 8067 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8068 // 1. This operand is input modifiers 8069 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8070 // 2. This is not last operand 8071 && Desc.NumOperands > (OpNum + 1) 8072 // 3. Next operand is register class 8073 && Desc.OpInfo[OpNum + 1].RegClass != -1 8074 // 4. Next register is not tied to any other operand 8075 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8076 } 8077 8078 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8079 { 8080 OptionalImmIndexMap OptionalIdx; 8081 unsigned Opc = Inst.getOpcode(); 8082 8083 unsigned I = 1; 8084 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8085 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8086 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8087 } 8088 8089 for (unsigned E = Operands.size(); I != E; ++I) { 8090 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8091 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8092 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8093 } else if (Op.isInterpSlot() || 8094 Op.isInterpAttr() || 8095 Op.isAttrChan()) { 8096 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8097 } else if (Op.isImmModifier()) { 8098 OptionalIdx[Op.getImmTy()] = I; 8099 } else { 8100 llvm_unreachable("unhandled operand type"); 8101 } 8102 } 8103 8104 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8105 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8106 } 8107 8108 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8110 } 8111 8112 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8114 } 8115 } 8116 8117 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8118 { 8119 OptionalImmIndexMap OptionalIdx; 8120 unsigned Opc = Inst.getOpcode(); 8121 8122 unsigned I = 1; 8123 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8124 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8125 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8126 } 8127 8128 for (unsigned E = Operands.size(); I != E; ++I) { 8129 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8130 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8131 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8132 } else if (Op.isImmModifier()) { 8133 OptionalIdx[Op.getImmTy()] = I; 8134 } else { 8135 llvm_unreachable("unhandled operand type"); 8136 } 8137 } 8138 8139 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8140 8141 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8142 if (OpSelIdx != -1) 8143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8144 8145 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8146 8147 if (OpSelIdx == -1) 8148 return; 8149 8150 const int Ops[] = { AMDGPU::OpName::src0, 8151 AMDGPU::OpName::src1, 8152 AMDGPU::OpName::src2 }; 8153 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8154 AMDGPU::OpName::src1_modifiers, 8155 AMDGPU::OpName::src2_modifiers }; 8156 8157 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8158 8159 for (int J = 0; J < 3; ++J) { 8160 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8161 if (OpIdx == -1) 8162 break; 8163 8164 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8165 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8166 8167 if ((OpSel & (1 << J)) != 0) 8168 ModVal |= SISrcMods::OP_SEL_0; 8169 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8170 (OpSel & (1 << 3)) != 0) 8171 ModVal |= SISrcMods::DST_OP_SEL; 8172 8173 Inst.getOperand(ModIdx).setImm(ModVal); 8174 } 8175 } 8176 8177 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8178 OptionalImmIndexMap &OptionalIdx) { 8179 unsigned Opc = Inst.getOpcode(); 8180 8181 unsigned I = 1; 8182 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8183 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8184 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8185 } 8186 8187 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8188 // This instruction has src modifiers 8189 for (unsigned E = Operands.size(); I != E; ++I) { 8190 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8191 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8192 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8193 } else if (Op.isImmModifier()) { 8194 OptionalIdx[Op.getImmTy()] = I; 8195 } else if (Op.isRegOrImm()) { 8196 Op.addRegOrImmOperands(Inst, 1); 8197 } else { 8198 llvm_unreachable("unhandled operand type"); 8199 } 8200 } 8201 } else { 8202 // No src modifiers 8203 for (unsigned E = Operands.size(); I != E; ++I) { 8204 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8205 if (Op.isMod()) { 8206 OptionalIdx[Op.getImmTy()] = I; 8207 } else { 8208 Op.addRegOrImmOperands(Inst, 1); 8209 } 8210 } 8211 } 8212 8213 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8214 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8215 } 8216 8217 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8219 } 8220 8221 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8222 // it has src2 register operand that is tied to dst operand 8223 // we don't allow modifiers for this operand in assembler so src2_modifiers 8224 // should be 0. 8225 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8226 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8227 Opc == AMDGPU::V_MAC_F32_e64_vi || 8228 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8229 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8230 Opc == AMDGPU::V_MAC_F16_e64_vi || 8231 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8232 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8233 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8234 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8235 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8236 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8237 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8238 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8239 auto it = Inst.begin(); 8240 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8241 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8242 ++it; 8243 // Copy the operand to ensure it's not invalidated when Inst grows. 8244 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8245 } 8246 } 8247 8248 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8249 OptionalImmIndexMap OptionalIdx; 8250 cvtVOP3(Inst, Operands, OptionalIdx); 8251 } 8252 8253 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8254 OptionalImmIndexMap &OptIdx) { 8255 const int Opc = Inst.getOpcode(); 8256 const MCInstrDesc &Desc = MII.get(Opc); 8257 8258 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8259 8260 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8261 assert(!IsPacked); 8262 Inst.addOperand(Inst.getOperand(0)); 8263 } 8264 8265 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8266 // instruction, and then figure out where to actually put the modifiers 8267 8268 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8269 if (OpSelIdx != -1) { 8270 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8271 } 8272 8273 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8274 if (OpSelHiIdx != -1) { 8275 int DefaultVal = IsPacked ? -1 : 0; 8276 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8277 DefaultVal); 8278 } 8279 8280 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8281 if (NegLoIdx != -1) { 8282 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8283 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8284 } 8285 8286 const int Ops[] = { AMDGPU::OpName::src0, 8287 AMDGPU::OpName::src1, 8288 AMDGPU::OpName::src2 }; 8289 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8290 AMDGPU::OpName::src1_modifiers, 8291 AMDGPU::OpName::src2_modifiers }; 8292 8293 unsigned OpSel = 0; 8294 unsigned OpSelHi = 0; 8295 unsigned NegLo = 0; 8296 unsigned NegHi = 0; 8297 8298 if (OpSelIdx != -1) 8299 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8300 8301 if (OpSelHiIdx != -1) 8302 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8303 8304 if (NegLoIdx != -1) { 8305 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8306 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8307 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8308 } 8309 8310 for (int J = 0; J < 3; ++J) { 8311 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8312 if (OpIdx == -1) 8313 break; 8314 8315 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8316 8317 if (ModIdx == -1) 8318 continue; 8319 8320 uint32_t ModVal = 0; 8321 8322 if ((OpSel & (1 << J)) != 0) 8323 ModVal |= SISrcMods::OP_SEL_0; 8324 8325 if ((OpSelHi & (1 << J)) != 0) 8326 ModVal |= SISrcMods::OP_SEL_1; 8327 8328 if ((NegLo & (1 << J)) != 0) 8329 ModVal |= SISrcMods::NEG; 8330 8331 if ((NegHi & (1 << J)) != 0) 8332 ModVal |= SISrcMods::NEG_HI; 8333 8334 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8335 } 8336 } 8337 8338 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8339 OptionalImmIndexMap OptIdx; 8340 cvtVOP3(Inst, Operands, OptIdx); 8341 cvtVOP3P(Inst, Operands, OptIdx); 8342 } 8343 8344 //===----------------------------------------------------------------------===// 8345 // VOPD 8346 //===----------------------------------------------------------------------===// 8347 8348 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8349 if (!hasVOPD(getSTI())) 8350 return MatchOperand_NoMatch; 8351 8352 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8353 SMLoc S = getLoc(); 8354 lex(); 8355 lex(); 8356 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8357 const MCExpr *Expr; 8358 if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { 8359 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 8360 return MatchOperand_Success; 8361 } 8362 Error(S, "invalid VOPD :: usage"); 8363 return MatchOperand_ParseFail; 8364 } 8365 return MatchOperand_NoMatch; 8366 } 8367 8368 // Create VOPD MCInst operands using parsed assembler operands. 8369 // Parsed VOPD operands are ordered as follows: 8370 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 8371 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8372 // If both OpX and OpY have an imm, the first imm has a different name: 8373 // OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' 8374 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8375 // MCInst operands have the following order: 8376 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8377 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8378 auto addOp = [&](uint16_t i) { // NOLINT:function pointer 8379 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8380 if (Op.isReg()) { 8381 Op.addRegOperands(Inst, 1); 8382 return; 8383 } 8384 if (Op.isImm()) { 8385 Op.addImmOperands(Inst, 1); 8386 return; 8387 } 8388 // Handle tokens like 'offen' which are sometimes hard-coded into the 8389 // asm string. There are no MCInst operands for these. 8390 if (Op.isToken()) { 8391 return; 8392 } 8393 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8394 }; 8395 8396 // Indices into MCInst.Operands 8397 const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... 8398 const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... 8399 const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... 8400 8401 unsigned Opc = Inst.getOpcode(); 8402 bool HasVsrc1X = 8403 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; 8404 bool HasImmX = 8405 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8406 (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8407 FmamkOpXImmMCIndex || 8408 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8409 FmaakOpXImmMCIndex)); 8410 8411 bool HasVsrc1Y = 8412 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; 8413 bool HasImmY = 8414 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8415 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= 8416 MinOpYImmMCIndex + HasVsrc1X; 8417 8418 // Indices of parsed operands relative to dst 8419 const auto DstIdx = 0; 8420 const auto Src0Idx = 1; 8421 const auto Vsrc1OrImmIdx = 2; 8422 8423 const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; 8424 const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) 8425 8426 // Offsets into parsed operands 8427 const auto OpXFirstOperandOffset = 1; 8428 const auto OpYFirstOperandOffset = 8429 OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; 8430 8431 // Order of addOp calls determines MC operand order 8432 addOp(OpXFirstOperandOffset + DstIdx); // vdstX 8433 addOp(OpYFirstOperandOffset + DstIdx); // vdstY 8434 8435 addOp(OpXFirstOperandOffset + Src0Idx); // src0X 8436 if (HasImmX) { 8437 // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak 8438 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); 8439 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); 8440 } else { 8441 if (HasVsrc1X) // all except v_mov 8442 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X 8443 } 8444 8445 addOp(OpYFirstOperandOffset + Src0Idx); // src0Y 8446 if (HasImmY) { 8447 // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak 8448 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); 8449 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); 8450 } else { 8451 if (HasVsrc1Y) // all except v_mov 8452 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y 8453 } 8454 } 8455 8456 //===----------------------------------------------------------------------===// 8457 // dpp 8458 //===----------------------------------------------------------------------===// 8459 8460 bool AMDGPUOperand::isDPP8() const { 8461 return isImmTy(ImmTyDPP8); 8462 } 8463 8464 bool AMDGPUOperand::isDPPCtrl() const { 8465 using namespace AMDGPU::DPP; 8466 8467 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8468 if (result) { 8469 int64_t Imm = getImm(); 8470 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8471 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8472 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8473 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8474 (Imm == DppCtrl::WAVE_SHL1) || 8475 (Imm == DppCtrl::WAVE_ROL1) || 8476 (Imm == DppCtrl::WAVE_SHR1) || 8477 (Imm == DppCtrl::WAVE_ROR1) || 8478 (Imm == DppCtrl::ROW_MIRROR) || 8479 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8480 (Imm == DppCtrl::BCAST15) || 8481 (Imm == DppCtrl::BCAST31) || 8482 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8483 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8484 } 8485 return false; 8486 } 8487 8488 //===----------------------------------------------------------------------===// 8489 // mAI 8490 //===----------------------------------------------------------------------===// 8491 8492 bool AMDGPUOperand::isBLGP() const { 8493 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8494 } 8495 8496 bool AMDGPUOperand::isCBSZ() const { 8497 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8498 } 8499 8500 bool AMDGPUOperand::isABID() const { 8501 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8502 } 8503 8504 bool AMDGPUOperand::isS16Imm() const { 8505 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8506 } 8507 8508 bool AMDGPUOperand::isU16Imm() const { 8509 return isImm() && isUInt<16>(getImm()); 8510 } 8511 8512 //===----------------------------------------------------------------------===// 8513 // dim 8514 //===----------------------------------------------------------------------===// 8515 8516 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8517 // We want to allow "dim:1D" etc., 8518 // but the initial 1 is tokenized as an integer. 8519 std::string Token; 8520 if (isToken(AsmToken::Integer)) { 8521 SMLoc Loc = getToken().getEndLoc(); 8522 Token = std::string(getTokenStr()); 8523 lex(); 8524 if (getLoc() != Loc) 8525 return false; 8526 } 8527 8528 StringRef Suffix; 8529 if (!parseId(Suffix)) 8530 return false; 8531 Token += Suffix; 8532 8533 StringRef DimId = Token; 8534 if (DimId.startswith("SQ_RSRC_IMG_")) 8535 DimId = DimId.drop_front(12); 8536 8537 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8538 if (!DimInfo) 8539 return false; 8540 8541 Encoding = DimInfo->Encoding; 8542 return true; 8543 } 8544 8545 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8546 if (!isGFX10Plus()) 8547 return MatchOperand_NoMatch; 8548 8549 SMLoc S = getLoc(); 8550 8551 if (!trySkipId("dim", AsmToken::Colon)) 8552 return MatchOperand_NoMatch; 8553 8554 unsigned Encoding; 8555 SMLoc Loc = getLoc(); 8556 if (!parseDimId(Encoding)) { 8557 Error(Loc, "invalid dim value"); 8558 return MatchOperand_ParseFail; 8559 } 8560 8561 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8562 AMDGPUOperand::ImmTyDim)); 8563 return MatchOperand_Success; 8564 } 8565 8566 //===----------------------------------------------------------------------===// 8567 // dpp 8568 //===----------------------------------------------------------------------===// 8569 8570 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8571 SMLoc S = getLoc(); 8572 8573 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8574 return MatchOperand_NoMatch; 8575 8576 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8577 8578 int64_t Sels[8]; 8579 8580 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8581 return MatchOperand_ParseFail; 8582 8583 for (size_t i = 0; i < 8; ++i) { 8584 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8585 return MatchOperand_ParseFail; 8586 8587 SMLoc Loc = getLoc(); 8588 if (getParser().parseAbsoluteExpression(Sels[i])) 8589 return MatchOperand_ParseFail; 8590 if (0 > Sels[i] || 7 < Sels[i]) { 8591 Error(Loc, "expected a 3-bit value"); 8592 return MatchOperand_ParseFail; 8593 } 8594 } 8595 8596 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8597 return MatchOperand_ParseFail; 8598 8599 unsigned DPP8 = 0; 8600 for (size_t i = 0; i < 8; ++i) 8601 DPP8 |= (Sels[i] << (i * 3)); 8602 8603 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8604 return MatchOperand_Success; 8605 } 8606 8607 bool 8608 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8609 const OperandVector &Operands) { 8610 if (Ctrl == "row_newbcast") 8611 return isGFX90A(); 8612 8613 if (Ctrl == "row_share" || 8614 Ctrl == "row_xmask") 8615 return isGFX10Plus(); 8616 8617 if (Ctrl == "wave_shl" || 8618 Ctrl == "wave_shr" || 8619 Ctrl == "wave_rol" || 8620 Ctrl == "wave_ror" || 8621 Ctrl == "row_bcast") 8622 return isVI() || isGFX9(); 8623 8624 return Ctrl == "row_mirror" || 8625 Ctrl == "row_half_mirror" || 8626 Ctrl == "quad_perm" || 8627 Ctrl == "row_shl" || 8628 Ctrl == "row_shr" || 8629 Ctrl == "row_ror"; 8630 } 8631 8632 int64_t 8633 AMDGPUAsmParser::parseDPPCtrlPerm() { 8634 // quad_perm:[%d,%d,%d,%d] 8635 8636 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8637 return -1; 8638 8639 int64_t Val = 0; 8640 for (int i = 0; i < 4; ++i) { 8641 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8642 return -1; 8643 8644 int64_t Temp; 8645 SMLoc Loc = getLoc(); 8646 if (getParser().parseAbsoluteExpression(Temp)) 8647 return -1; 8648 if (Temp < 0 || Temp > 3) { 8649 Error(Loc, "expected a 2-bit value"); 8650 return -1; 8651 } 8652 8653 Val += (Temp << i * 2); 8654 } 8655 8656 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8657 return -1; 8658 8659 return Val; 8660 } 8661 8662 int64_t 8663 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8664 using namespace AMDGPU::DPP; 8665 8666 // sel:%d 8667 8668 int64_t Val; 8669 SMLoc Loc = getLoc(); 8670 8671 if (getParser().parseAbsoluteExpression(Val)) 8672 return -1; 8673 8674 struct DppCtrlCheck { 8675 int64_t Ctrl; 8676 int Lo; 8677 int Hi; 8678 }; 8679 8680 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8681 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8682 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8683 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8684 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8685 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8686 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8687 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8688 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8689 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8690 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8691 .Default({-1, 0, 0}); 8692 8693 bool Valid; 8694 if (Check.Ctrl == -1) { 8695 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8696 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8697 } else { 8698 Valid = Check.Lo <= Val && Val <= Check.Hi; 8699 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8700 } 8701 8702 if (!Valid) { 8703 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8704 return -1; 8705 } 8706 8707 return Val; 8708 } 8709 8710 OperandMatchResultTy 8711 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8712 using namespace AMDGPU::DPP; 8713 8714 if (!isToken(AsmToken::Identifier) || 8715 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8716 return MatchOperand_NoMatch; 8717 8718 SMLoc S = getLoc(); 8719 int64_t Val = -1; 8720 StringRef Ctrl; 8721 8722 parseId(Ctrl); 8723 8724 if (Ctrl == "row_mirror") { 8725 Val = DppCtrl::ROW_MIRROR; 8726 } else if (Ctrl == "row_half_mirror") { 8727 Val = DppCtrl::ROW_HALF_MIRROR; 8728 } else { 8729 if (skipToken(AsmToken::Colon, "expected a colon")) { 8730 if (Ctrl == "quad_perm") { 8731 Val = parseDPPCtrlPerm(); 8732 } else { 8733 Val = parseDPPCtrlSel(Ctrl); 8734 } 8735 } 8736 } 8737 8738 if (Val == -1) 8739 return MatchOperand_ParseFail; 8740 8741 Operands.push_back( 8742 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8743 return MatchOperand_Success; 8744 } 8745 8746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8747 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8748 } 8749 8750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8751 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8752 } 8753 8754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8755 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8756 } 8757 8758 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8759 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8760 } 8761 8762 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8763 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8764 } 8765 8766 // Add dummy $old operand 8767 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8768 const OperandVector &Operands, 8769 bool IsDPP8) { 8770 Inst.addOperand(MCOperand::createReg(0)); 8771 cvtVOP3DPP(Inst, Operands, IsDPP8); 8772 } 8773 8774 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8775 OptionalImmIndexMap OptionalIdx; 8776 unsigned Opc = Inst.getOpcode(); 8777 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8778 unsigned I = 1; 8779 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8780 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8781 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8782 } 8783 8784 int Fi = 0; 8785 for (unsigned E = Operands.size(); I != E; ++I) { 8786 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8787 MCOI::TIED_TO); 8788 if (TiedTo != -1) { 8789 assert((unsigned)TiedTo < Inst.getNumOperands()); 8790 // handle tied old or src2 for MAC instructions 8791 Inst.addOperand(Inst.getOperand(TiedTo)); 8792 } 8793 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8794 // Add the register arguments 8795 if (IsDPP8 && Op.isFI()) { 8796 Fi = Op.getImm(); 8797 } else if (HasModifiers && 8798 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8799 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8800 } else if (Op.isReg()) { 8801 Op.addRegOperands(Inst, 1); 8802 } else if (Op.isImm() && 8803 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8804 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8805 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8806 Op.addImmOperands(Inst, 1); 8807 } else if (Op.isImm()) { 8808 OptionalIdx[Op.getImmTy()] = I; 8809 } else { 8810 llvm_unreachable("unhandled operand type"); 8811 } 8812 } 8813 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8815 } 8816 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8818 } 8819 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8820 cvtVOP3P(Inst, Operands, OptionalIdx); 8821 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8822 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8823 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8824 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8825 } 8826 8827 if (IsDPP8) { 8828 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8829 using namespace llvm::AMDGPU::DPP; 8830 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8831 } else { 8832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8836 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8838 } 8839 } 8840 } 8841 8842 // Add dummy $old operand 8843 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8844 const OperandVector &Operands, 8845 bool IsDPP8) { 8846 Inst.addOperand(MCOperand::createReg(0)); 8847 cvtDPP(Inst, Operands, IsDPP8); 8848 } 8849 8850 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8851 OptionalImmIndexMap OptionalIdx; 8852 8853 unsigned Opc = Inst.getOpcode(); 8854 bool HasModifiers = 8855 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8856 unsigned I = 1; 8857 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8858 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8859 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8860 } 8861 8862 int Fi = 0; 8863 for (unsigned E = Operands.size(); I != E; ++I) { 8864 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8865 MCOI::TIED_TO); 8866 if (TiedTo != -1) { 8867 assert((unsigned)TiedTo < Inst.getNumOperands()); 8868 // handle tied old or src2 for MAC instructions 8869 Inst.addOperand(Inst.getOperand(TiedTo)); 8870 } 8871 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8872 // Add the register arguments 8873 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8874 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8875 // Skip it. 8876 continue; 8877 } 8878 8879 if (IsDPP8) { 8880 if (Op.isDPP8()) { 8881 Op.addImmOperands(Inst, 1); 8882 } else if (HasModifiers && 8883 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8884 Op.addRegWithFPInputModsOperands(Inst, 2); 8885 } else if (Op.isFI()) { 8886 Fi = Op.getImm(); 8887 } else if (Op.isReg()) { 8888 Op.addRegOperands(Inst, 1); 8889 } else { 8890 llvm_unreachable("Invalid operand type"); 8891 } 8892 } else { 8893 if (HasModifiers && 8894 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8895 Op.addRegWithFPInputModsOperands(Inst, 2); 8896 } else if (Op.isReg()) { 8897 Op.addRegOperands(Inst, 1); 8898 } else if (Op.isDPPCtrl()) { 8899 Op.addImmOperands(Inst, 1); 8900 } else if (Op.isImm()) { 8901 // Handle optional arguments 8902 OptionalIdx[Op.getImmTy()] = I; 8903 } else { 8904 llvm_unreachable("Invalid operand type"); 8905 } 8906 } 8907 } 8908 8909 if (IsDPP8) { 8910 using namespace llvm::AMDGPU::DPP; 8911 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8912 } else { 8913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8916 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8917 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8918 } 8919 } 8920 } 8921 8922 //===----------------------------------------------------------------------===// 8923 // sdwa 8924 //===----------------------------------------------------------------------===// 8925 8926 OperandMatchResultTy 8927 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8928 AMDGPUOperand::ImmTy Type) { 8929 using namespace llvm::AMDGPU::SDWA; 8930 8931 SMLoc S = getLoc(); 8932 StringRef Value; 8933 OperandMatchResultTy res; 8934 8935 SMLoc StringLoc; 8936 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8937 if (res != MatchOperand_Success) { 8938 return res; 8939 } 8940 8941 int64_t Int; 8942 Int = StringSwitch<int64_t>(Value) 8943 .Case("BYTE_0", SdwaSel::BYTE_0) 8944 .Case("BYTE_1", SdwaSel::BYTE_1) 8945 .Case("BYTE_2", SdwaSel::BYTE_2) 8946 .Case("BYTE_3", SdwaSel::BYTE_3) 8947 .Case("WORD_0", SdwaSel::WORD_0) 8948 .Case("WORD_1", SdwaSel::WORD_1) 8949 .Case("DWORD", SdwaSel::DWORD) 8950 .Default(0xffffffff); 8951 8952 if (Int == 0xffffffff) { 8953 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8954 return MatchOperand_ParseFail; 8955 } 8956 8957 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8958 return MatchOperand_Success; 8959 } 8960 8961 OperandMatchResultTy 8962 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8963 using namespace llvm::AMDGPU::SDWA; 8964 8965 SMLoc S = getLoc(); 8966 StringRef Value; 8967 OperandMatchResultTy res; 8968 8969 SMLoc StringLoc; 8970 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8971 if (res != MatchOperand_Success) { 8972 return res; 8973 } 8974 8975 int64_t Int; 8976 Int = StringSwitch<int64_t>(Value) 8977 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8978 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8979 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8980 .Default(0xffffffff); 8981 8982 if (Int == 0xffffffff) { 8983 Error(StringLoc, "invalid dst_unused value"); 8984 return MatchOperand_ParseFail; 8985 } 8986 8987 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8988 return MatchOperand_Success; 8989 } 8990 8991 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8992 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8993 } 8994 8995 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8996 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8997 } 8998 8999 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 9000 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 9001 } 9002 9003 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 9004 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 9005 } 9006 9007 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 9008 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 9009 } 9010 9011 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 9012 uint64_t BasicInstType, 9013 bool SkipDstVcc, 9014 bool SkipSrcVcc) { 9015 using namespace llvm::AMDGPU::SDWA; 9016 9017 OptionalImmIndexMap OptionalIdx; 9018 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9019 bool SkippedVcc = false; 9020 9021 unsigned I = 1; 9022 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9023 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9024 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9025 } 9026 9027 for (unsigned E = Operands.size(); I != E; ++I) { 9028 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9029 if (SkipVcc && !SkippedVcc && Op.isReg() && 9030 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9031 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9032 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9033 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9034 // Skip VCC only if we didn't skip it on previous iteration. 9035 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9036 if (BasicInstType == SIInstrFlags::VOP2 && 9037 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9038 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9039 SkippedVcc = true; 9040 continue; 9041 } else if (BasicInstType == SIInstrFlags::VOPC && 9042 Inst.getNumOperands() == 0) { 9043 SkippedVcc = true; 9044 continue; 9045 } 9046 } 9047 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9048 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9049 } else if (Op.isImm()) { 9050 // Handle optional arguments 9051 OptionalIdx[Op.getImmTy()] = I; 9052 } else { 9053 llvm_unreachable("Invalid operand type"); 9054 } 9055 SkippedVcc = false; 9056 } 9057 9058 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 9059 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 9060 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 9061 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9062 switch (BasicInstType) { 9063 case SIInstrFlags::VOP1: 9064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9065 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9067 } 9068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9071 break; 9072 9073 case SIInstrFlags::VOP2: 9074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9075 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9077 } 9078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9082 break; 9083 9084 case SIInstrFlags::VOPC: 9085 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 9086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9089 break; 9090 9091 default: 9092 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9093 } 9094 } 9095 9096 // special case v_mac_{f16, f32}: 9097 // it has src2 register operand that is tied to dst operand 9098 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9099 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9100 auto it = Inst.begin(); 9101 std::advance( 9102 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9103 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9104 } 9105 } 9106 9107 //===----------------------------------------------------------------------===// 9108 // mAI 9109 //===----------------------------------------------------------------------===// 9110 9111 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 9112 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 9113 } 9114 9115 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 9116 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 9117 } 9118 9119 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 9120 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 9121 } 9122 9123 /// Force static initialization. 9124 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9125 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 9126 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9127 } 9128 9129 #define GET_REGISTER_MATCHER 9130 #define GET_MATCHER_IMPLEMENTATION 9131 #define GET_MNEMONIC_SPELL_CHECKER 9132 #define GET_MNEMONIC_CHECKER 9133 #include "AMDGPUGenAsmMatcher.inc" 9134 9135 // This function should be defined after auto-generated include so that we have 9136 // MatchClassKind enum defined 9137 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9138 unsigned Kind) { 9139 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9140 // But MatchInstructionImpl() expects to meet token and fails to validate 9141 // operand. This method checks if we are given immediate operand but expect to 9142 // get corresponding token. 9143 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9144 switch (Kind) { 9145 case MCK_addr64: 9146 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9147 case MCK_gds: 9148 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9149 case MCK_lds: 9150 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9151 case MCK_idxen: 9152 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9153 case MCK_offen: 9154 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9155 case MCK_SSrcB32: 9156 // When operands have expression values, they will return true for isToken, 9157 // because it is not possible to distinguish between a token and an 9158 // expression at parse time. MatchInstructionImpl() will always try to 9159 // match an operand as a token, when isToken returns true, and when the 9160 // name of the expression is not a valid token, the match will fail, 9161 // so we need to handle it here. 9162 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9163 case MCK_SSrcF32: 9164 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9165 case MCK_SoppBrTarget: 9166 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9167 case MCK_VReg32OrOff: 9168 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9169 case MCK_InterpSlot: 9170 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9171 case MCK_Attr: 9172 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9173 case MCK_AttrChan: 9174 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9175 case MCK_ImmSMEMOffset: 9176 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9177 case MCK_SReg_64: 9178 case MCK_SReg_64_XEXEC: 9179 // Null is defined as a 32-bit register but 9180 // it should also be enabled with 64-bit operands. 9181 // The following code enables it for SReg_64 operands 9182 // used as source and destination. Remaining source 9183 // operands are handled in isInlinableImm. 9184 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9185 default: 9186 return Match_InvalidOperand; 9187 } 9188 } 9189 9190 //===----------------------------------------------------------------------===// 9191 // endpgm 9192 //===----------------------------------------------------------------------===// 9193 9194 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9195 SMLoc S = getLoc(); 9196 int64_t Imm = 0; 9197 9198 if (!parseExpr(Imm)) { 9199 // The operand is optional, if not present default to 0 9200 Imm = 0; 9201 } 9202 9203 if (!isUInt<16>(Imm)) { 9204 Error(S, "expected a 16-bit value"); 9205 return MatchOperand_ParseFail; 9206 } 9207 9208 Operands.push_back( 9209 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9210 return MatchOperand_Success; 9211 } 9212 9213 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9214 9215 //===----------------------------------------------------------------------===// 9216 // LDSDIR 9217 //===----------------------------------------------------------------------===// 9218 9219 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9220 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9221 } 9222 9223 bool AMDGPUOperand::isWaitVDST() const { 9224 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9225 } 9226 9227 //===----------------------------------------------------------------------===// 9228 // VINTERP 9229 //===----------------------------------------------------------------------===// 9230 9231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9232 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9233 } 9234 9235 bool AMDGPUOperand::isWaitEXP() const { 9236 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9237 } 9238