1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(bool ShouldSkipSpace = true); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1742 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1743 OptionalImmIndexMap &OptionalIdx); 1744 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1745 OptionalImmIndexMap &OptionalIdx); 1746 1747 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1748 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1749 1750 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1751 bool IsAtomic = false); 1752 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1753 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1754 1755 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1756 1757 bool parseDimId(unsigned &Encoding); 1758 OperandMatchResultTy parseDim(OperandVector &Operands); 1759 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1760 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1761 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1762 int64_t parseDPPCtrlSel(StringRef Ctrl); 1763 int64_t parseDPPCtrlPerm(); 1764 AMDGPUOperand::Ptr defaultRowMask() const; 1765 AMDGPUOperand::Ptr defaultBankMask() const; 1766 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1767 AMDGPUOperand::Ptr defaultFI() const; 1768 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1769 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1770 cvtDPP(Inst, Operands, true); 1771 } 1772 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1773 bool IsDPP8 = false); 1774 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1775 cvtVOP3DPP(Inst, Operands, true); 1776 } 1777 1778 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1779 AMDGPUOperand::ImmTy Type); 1780 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1781 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1782 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1783 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1784 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1785 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1786 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1787 uint64_t BasicInstType, 1788 bool SkipDstVcc = false, 1789 bool SkipSrcVcc = false); 1790 1791 AMDGPUOperand::Ptr defaultBLGP() const; 1792 AMDGPUOperand::Ptr defaultCBSZ() const; 1793 AMDGPUOperand::Ptr defaultABID() const; 1794 1795 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1796 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1797 1798 AMDGPUOperand::Ptr defaultWaitVDST() const; 1799 AMDGPUOperand::Ptr defaultWaitEXP() const; 1800 OperandMatchResultTy parseVOPD(OperandVector &Operands); 1801 }; 1802 1803 struct OptionalOperand { 1804 const char *Name; 1805 AMDGPUOperand::ImmTy Type; 1806 bool IsBit; 1807 bool (*ConvertResult)(int64_t&); 1808 }; 1809 1810 } // end anonymous namespace 1811 1812 // May be called with integer type with equivalent bitwidth. 1813 static const fltSemantics *getFltSemantics(unsigned Size) { 1814 switch (Size) { 1815 case 4: 1816 return &APFloat::IEEEsingle(); 1817 case 8: 1818 return &APFloat::IEEEdouble(); 1819 case 2: 1820 return &APFloat::IEEEhalf(); 1821 default: 1822 llvm_unreachable("unsupported fp type"); 1823 } 1824 } 1825 1826 static const fltSemantics *getFltSemantics(MVT VT) { 1827 return getFltSemantics(VT.getSizeInBits() / 8); 1828 } 1829 1830 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1831 switch (OperandType) { 1832 case AMDGPU::OPERAND_REG_IMM_INT32: 1833 case AMDGPU::OPERAND_REG_IMM_FP32: 1834 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1835 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1836 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1838 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1839 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1840 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1841 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1842 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1843 case AMDGPU::OPERAND_KIMM32: 1844 return &APFloat::IEEEsingle(); 1845 case AMDGPU::OPERAND_REG_IMM_INT64: 1846 case AMDGPU::OPERAND_REG_IMM_FP64: 1847 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1848 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1850 return &APFloat::IEEEdouble(); 1851 case AMDGPU::OPERAND_REG_IMM_INT16: 1852 case AMDGPU::OPERAND_REG_IMM_FP16: 1853 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1854 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1855 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1856 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1857 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1858 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1860 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1861 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1862 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1863 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1864 case AMDGPU::OPERAND_KIMM16: 1865 return &APFloat::IEEEhalf(); 1866 default: 1867 llvm_unreachable("unsupported fp type"); 1868 } 1869 } 1870 1871 //===----------------------------------------------------------------------===// 1872 // Operand 1873 //===----------------------------------------------------------------------===// 1874 1875 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1876 bool Lost; 1877 1878 // Convert literal to single precision 1879 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1880 APFloat::rmNearestTiesToEven, 1881 &Lost); 1882 // We allow precision lost but not overflow or underflow 1883 if (Status != APFloat::opOK && 1884 Lost && 1885 ((Status & APFloat::opOverflow) != 0 || 1886 (Status & APFloat::opUnderflow) != 0)) { 1887 return false; 1888 } 1889 1890 return true; 1891 } 1892 1893 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1894 return isUIntN(Size, Val) || isIntN(Size, Val); 1895 } 1896 1897 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1898 if (VT.getScalarType() == MVT::i16) { 1899 // FP immediate values are broken. 1900 return isInlinableIntLiteral(Val); 1901 } 1902 1903 // f16/v2f16 operands work correctly for all values. 1904 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1905 } 1906 1907 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1908 1909 // This is a hack to enable named inline values like 1910 // shared_base with both 32-bit and 64-bit operands. 1911 // Note that these values are defined as 1912 // 32-bit operands only. 1913 if (isInlineValue()) { 1914 return true; 1915 } 1916 1917 if (!isImmTy(ImmTyNone)) { 1918 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1919 return false; 1920 } 1921 // TODO: We should avoid using host float here. It would be better to 1922 // check the float bit values which is what a few other places do. 1923 // We've had bot failures before due to weird NaN support on mips hosts. 1924 1925 APInt Literal(64, Imm.Val); 1926 1927 if (Imm.IsFPImm) { // We got fp literal token 1928 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1929 return AMDGPU::isInlinableLiteral64(Imm.Val, 1930 AsmParser->hasInv2PiInlineImm()); 1931 } 1932 1933 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1934 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1935 return false; 1936 1937 if (type.getScalarSizeInBits() == 16) { 1938 return isInlineableLiteralOp16( 1939 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1940 type, AsmParser->hasInv2PiInlineImm()); 1941 } 1942 1943 // Check if single precision literal is inlinable 1944 return AMDGPU::isInlinableLiteral32( 1945 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1946 AsmParser->hasInv2PiInlineImm()); 1947 } 1948 1949 // We got int literal token. 1950 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1951 return AMDGPU::isInlinableLiteral64(Imm.Val, 1952 AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1956 return false; 1957 } 1958 1959 if (type.getScalarSizeInBits() == 16) { 1960 return isInlineableLiteralOp16( 1961 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1962 type, AsmParser->hasInv2PiInlineImm()); 1963 } 1964 1965 return AMDGPU::isInlinableLiteral32( 1966 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1967 AsmParser->hasInv2PiInlineImm()); 1968 } 1969 1970 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1971 // Check that this immediate can be added as literal 1972 if (!isImmTy(ImmTyNone)) { 1973 return false; 1974 } 1975 1976 if (!Imm.IsFPImm) { 1977 // We got int literal token. 1978 1979 if (type == MVT::f64 && hasFPModifiers()) { 1980 // Cannot apply fp modifiers to int literals preserving the same semantics 1981 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1982 // disable these cases. 1983 return false; 1984 } 1985 1986 unsigned Size = type.getSizeInBits(); 1987 if (Size == 64) 1988 Size = 32; 1989 1990 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1991 // types. 1992 return isSafeTruncation(Imm.Val, Size); 1993 } 1994 1995 // We got fp literal token 1996 if (type == MVT::f64) { // Expected 64-bit fp operand 1997 // We would set low 64-bits of literal to zeroes but we accept this literals 1998 return true; 1999 } 2000 2001 if (type == MVT::i64) { // Expected 64-bit int operand 2002 // We don't allow fp literals in 64-bit integer instructions. It is 2003 // unclear how we should encode them. 2004 return false; 2005 } 2006 2007 // We allow fp literals with f16x2 operands assuming that the specified 2008 // literal goes into the lower half and the upper half is zero. We also 2009 // require that the literal may be losslessly converted to f16. 2010 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2011 (type == MVT::v2i16)? MVT::i16 : 2012 (type == MVT::v2f32)? MVT::f32 : type; 2013 2014 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2015 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2016 } 2017 2018 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2019 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2020 } 2021 2022 bool AMDGPUOperand::isVRegWithInputMods() const { 2023 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2024 // GFX90A allows DPP on 64-bit operands. 2025 (isRegClass(AMDGPU::VReg_64RegClassID) && 2026 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2027 } 2028 2029 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2030 if (AsmParser->isVI()) 2031 return isVReg32(); 2032 else if (AsmParser->isGFX9Plus()) 2033 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2034 else 2035 return false; 2036 } 2037 2038 bool AMDGPUOperand::isSDWAFP16Operand() const { 2039 return isSDWAOperand(MVT::f16); 2040 } 2041 2042 bool AMDGPUOperand::isSDWAFP32Operand() const { 2043 return isSDWAOperand(MVT::f32); 2044 } 2045 2046 bool AMDGPUOperand::isSDWAInt16Operand() const { 2047 return isSDWAOperand(MVT::i16); 2048 } 2049 2050 bool AMDGPUOperand::isSDWAInt32Operand() const { 2051 return isSDWAOperand(MVT::i32); 2052 } 2053 2054 bool AMDGPUOperand::isBoolReg() const { 2055 auto FB = AsmParser->getFeatureBits(); 2056 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2057 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2058 } 2059 2060 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2061 { 2062 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2063 assert(Size == 2 || Size == 4 || Size == 8); 2064 2065 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2066 2067 if (Imm.Mods.Abs) { 2068 Val &= ~FpSignMask; 2069 } 2070 if (Imm.Mods.Neg) { 2071 Val ^= FpSignMask; 2072 } 2073 2074 return Val; 2075 } 2076 2077 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2078 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2079 Inst.getNumOperands())) { 2080 addLiteralImmOperand(Inst, Imm.Val, 2081 ApplyModifiers & 2082 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2083 } else { 2084 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2085 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2086 setImmKindNone(); 2087 } 2088 } 2089 2090 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2091 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2092 auto OpNum = Inst.getNumOperands(); 2093 // Check that this operand accepts literals 2094 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2095 2096 if (ApplyModifiers) { 2097 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2098 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2099 Val = applyInputFPModifiers(Val, Size); 2100 } 2101 2102 APInt Literal(64, Val); 2103 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2104 2105 if (Imm.IsFPImm) { // We got fp literal token 2106 switch (OpTy) { 2107 case AMDGPU::OPERAND_REG_IMM_INT64: 2108 case AMDGPU::OPERAND_REG_IMM_FP64: 2109 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2110 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2111 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2112 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2113 AsmParser->hasInv2PiInlineImm())) { 2114 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2115 setImmKindConst(); 2116 return; 2117 } 2118 2119 // Non-inlineable 2120 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2121 // For fp operands we check if low 32 bits are zeros 2122 if (Literal.getLoBits(32) != 0) { 2123 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2124 "Can't encode literal as exact 64-bit floating-point operand. " 2125 "Low 32-bits will be set to zero"); 2126 } 2127 2128 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2129 setImmKindLiteral(); 2130 return; 2131 } 2132 2133 // We don't allow fp literals in 64-bit integer instructions. It is 2134 // unclear how we should encode them. This case should be checked earlier 2135 // in predicate methods (isLiteralImm()) 2136 llvm_unreachable("fp literal in 64-bit integer instruction."); 2137 2138 case AMDGPU::OPERAND_REG_IMM_INT32: 2139 case AMDGPU::OPERAND_REG_IMM_FP32: 2140 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2141 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2142 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2143 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2144 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2145 case AMDGPU::OPERAND_REG_IMM_INT16: 2146 case AMDGPU::OPERAND_REG_IMM_FP16: 2147 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2148 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2149 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2150 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2151 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2152 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2153 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2154 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2156 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2157 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2159 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2160 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2161 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2162 case AMDGPU::OPERAND_KIMM32: 2163 case AMDGPU::OPERAND_KIMM16: { 2164 bool lost; 2165 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2166 // Convert literal to single precision 2167 FPLiteral.convert(*getOpFltSemantics(OpTy), 2168 APFloat::rmNearestTiesToEven, &lost); 2169 // We allow precision lost but not overflow or underflow. This should be 2170 // checked earlier in isLiteralImm() 2171 2172 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2173 Inst.addOperand(MCOperand::createImm(ImmVal)); 2174 setImmKindLiteral(); 2175 return; 2176 } 2177 default: 2178 llvm_unreachable("invalid operand size"); 2179 } 2180 2181 return; 2182 } 2183 2184 // We got int literal token. 2185 // Only sign extend inline immediates. 2186 switch (OpTy) { 2187 case AMDGPU::OPERAND_REG_IMM_INT32: 2188 case AMDGPU::OPERAND_REG_IMM_FP32: 2189 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2190 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2191 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2192 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2194 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2195 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2196 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2197 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2198 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2200 if (isSafeTruncation(Val, 32) && 2201 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2202 AsmParser->hasInv2PiInlineImm())) { 2203 Inst.addOperand(MCOperand::createImm(Val)); 2204 setImmKindConst(); 2205 return; 2206 } 2207 2208 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2209 setImmKindLiteral(); 2210 return; 2211 2212 case AMDGPU::OPERAND_REG_IMM_INT64: 2213 case AMDGPU::OPERAND_REG_IMM_FP64: 2214 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2215 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2216 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2217 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2218 Inst.addOperand(MCOperand::createImm(Val)); 2219 setImmKindConst(); 2220 return; 2221 } 2222 2223 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2224 setImmKindLiteral(); 2225 return; 2226 2227 case AMDGPU::OPERAND_REG_IMM_INT16: 2228 case AMDGPU::OPERAND_REG_IMM_FP16: 2229 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2230 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2231 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2232 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2233 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2234 if (isSafeTruncation(Val, 16) && 2235 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2236 AsmParser->hasInv2PiInlineImm())) { 2237 Inst.addOperand(MCOperand::createImm(Val)); 2238 setImmKindConst(); 2239 return; 2240 } 2241 2242 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2243 setImmKindLiteral(); 2244 return; 2245 2246 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2247 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2248 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2249 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2250 assert(isSafeTruncation(Val, 16)); 2251 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2252 AsmParser->hasInv2PiInlineImm())); 2253 2254 Inst.addOperand(MCOperand::createImm(Val)); 2255 return; 2256 } 2257 case AMDGPU::OPERAND_KIMM32: 2258 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2259 setImmKindNone(); 2260 return; 2261 case AMDGPU::OPERAND_KIMM16: 2262 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2263 setImmKindNone(); 2264 return; 2265 default: 2266 llvm_unreachable("invalid operand size"); 2267 } 2268 } 2269 2270 template <unsigned Bitwidth> 2271 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2272 APInt Literal(64, Imm.Val); 2273 setImmKindNone(); 2274 2275 if (!Imm.IsFPImm) { 2276 // We got int literal token. 2277 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2278 return; 2279 } 2280 2281 bool Lost; 2282 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2283 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2284 APFloat::rmNearestTiesToEven, &Lost); 2285 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2286 } 2287 2288 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2289 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2290 } 2291 2292 static bool isInlineValue(unsigned Reg) { 2293 switch (Reg) { 2294 case AMDGPU::SRC_SHARED_BASE: 2295 case AMDGPU::SRC_SHARED_LIMIT: 2296 case AMDGPU::SRC_PRIVATE_BASE: 2297 case AMDGPU::SRC_PRIVATE_LIMIT: 2298 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2299 return true; 2300 case AMDGPU::SRC_VCCZ: 2301 case AMDGPU::SRC_EXECZ: 2302 case AMDGPU::SRC_SCC: 2303 return true; 2304 case AMDGPU::SGPR_NULL: 2305 return true; 2306 default: 2307 return false; 2308 } 2309 } 2310 2311 bool AMDGPUOperand::isInlineValue() const { 2312 return isRegKind() && ::isInlineValue(getReg()); 2313 } 2314 2315 //===----------------------------------------------------------------------===// 2316 // AsmParser 2317 //===----------------------------------------------------------------------===// 2318 2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2320 if (Is == IS_VGPR) { 2321 switch (RegWidth) { 2322 default: return -1; 2323 case 32: 2324 return AMDGPU::VGPR_32RegClassID; 2325 case 64: 2326 return AMDGPU::VReg_64RegClassID; 2327 case 96: 2328 return AMDGPU::VReg_96RegClassID; 2329 case 128: 2330 return AMDGPU::VReg_128RegClassID; 2331 case 160: 2332 return AMDGPU::VReg_160RegClassID; 2333 case 192: 2334 return AMDGPU::VReg_192RegClassID; 2335 case 224: 2336 return AMDGPU::VReg_224RegClassID; 2337 case 256: 2338 return AMDGPU::VReg_256RegClassID; 2339 case 512: 2340 return AMDGPU::VReg_512RegClassID; 2341 case 1024: 2342 return AMDGPU::VReg_1024RegClassID; 2343 } 2344 } else if (Is == IS_TTMP) { 2345 switch (RegWidth) { 2346 default: return -1; 2347 case 32: 2348 return AMDGPU::TTMP_32RegClassID; 2349 case 64: 2350 return AMDGPU::TTMP_64RegClassID; 2351 case 128: 2352 return AMDGPU::TTMP_128RegClassID; 2353 case 256: 2354 return AMDGPU::TTMP_256RegClassID; 2355 case 512: 2356 return AMDGPU::TTMP_512RegClassID; 2357 } 2358 } else if (Is == IS_SGPR) { 2359 switch (RegWidth) { 2360 default: return -1; 2361 case 32: 2362 return AMDGPU::SGPR_32RegClassID; 2363 case 64: 2364 return AMDGPU::SGPR_64RegClassID; 2365 case 96: 2366 return AMDGPU::SGPR_96RegClassID; 2367 case 128: 2368 return AMDGPU::SGPR_128RegClassID; 2369 case 160: 2370 return AMDGPU::SGPR_160RegClassID; 2371 case 192: 2372 return AMDGPU::SGPR_192RegClassID; 2373 case 224: 2374 return AMDGPU::SGPR_224RegClassID; 2375 case 256: 2376 return AMDGPU::SGPR_256RegClassID; 2377 case 512: 2378 return AMDGPU::SGPR_512RegClassID; 2379 } 2380 } else if (Is == IS_AGPR) { 2381 switch (RegWidth) { 2382 default: return -1; 2383 case 32: 2384 return AMDGPU::AGPR_32RegClassID; 2385 case 64: 2386 return AMDGPU::AReg_64RegClassID; 2387 case 96: 2388 return AMDGPU::AReg_96RegClassID; 2389 case 128: 2390 return AMDGPU::AReg_128RegClassID; 2391 case 160: 2392 return AMDGPU::AReg_160RegClassID; 2393 case 192: 2394 return AMDGPU::AReg_192RegClassID; 2395 case 224: 2396 return AMDGPU::AReg_224RegClassID; 2397 case 256: 2398 return AMDGPU::AReg_256RegClassID; 2399 case 512: 2400 return AMDGPU::AReg_512RegClassID; 2401 case 1024: 2402 return AMDGPU::AReg_1024RegClassID; 2403 } 2404 } 2405 return -1; 2406 } 2407 2408 static unsigned getSpecialRegForName(StringRef RegName) { 2409 return StringSwitch<unsigned>(RegName) 2410 .Case("exec", AMDGPU::EXEC) 2411 .Case("vcc", AMDGPU::VCC) 2412 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2413 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2414 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2415 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2416 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2417 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2418 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2419 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2420 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2421 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2422 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2423 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2424 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2425 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2426 .Case("m0", AMDGPU::M0) 2427 .Case("vccz", AMDGPU::SRC_VCCZ) 2428 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2429 .Case("execz", AMDGPU::SRC_EXECZ) 2430 .Case("src_execz", AMDGPU::SRC_EXECZ) 2431 .Case("scc", AMDGPU::SRC_SCC) 2432 .Case("src_scc", AMDGPU::SRC_SCC) 2433 .Case("tba", AMDGPU::TBA) 2434 .Case("tma", AMDGPU::TMA) 2435 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2436 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2437 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2438 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2439 .Case("vcc_lo", AMDGPU::VCC_LO) 2440 .Case("vcc_hi", AMDGPU::VCC_HI) 2441 .Case("exec_lo", AMDGPU::EXEC_LO) 2442 .Case("exec_hi", AMDGPU::EXEC_HI) 2443 .Case("tma_lo", AMDGPU::TMA_LO) 2444 .Case("tma_hi", AMDGPU::TMA_HI) 2445 .Case("tba_lo", AMDGPU::TBA_LO) 2446 .Case("tba_hi", AMDGPU::TBA_HI) 2447 .Case("pc", AMDGPU::PC_REG) 2448 .Case("null", AMDGPU::SGPR_NULL) 2449 .Default(AMDGPU::NoRegister); 2450 } 2451 2452 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2453 SMLoc &EndLoc, bool RestoreOnFailure) { 2454 auto R = parseRegister(); 2455 if (!R) return true; 2456 assert(R->isReg()); 2457 RegNo = R->getReg(); 2458 StartLoc = R->getStartLoc(); 2459 EndLoc = R->getEndLoc(); 2460 return false; 2461 } 2462 2463 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2464 SMLoc &EndLoc) { 2465 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2466 } 2467 2468 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2469 SMLoc &StartLoc, 2470 SMLoc &EndLoc) { 2471 bool Result = 2472 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2473 bool PendingErrors = getParser().hasPendingError(); 2474 getParser().clearPendingErrors(); 2475 if (PendingErrors) 2476 return MatchOperand_ParseFail; 2477 if (Result) 2478 return MatchOperand_NoMatch; 2479 return MatchOperand_Success; 2480 } 2481 2482 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2483 RegisterKind RegKind, unsigned Reg1, 2484 SMLoc Loc) { 2485 switch (RegKind) { 2486 case IS_SPECIAL: 2487 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2488 Reg = AMDGPU::EXEC; 2489 RegWidth = 64; 2490 return true; 2491 } 2492 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2493 Reg = AMDGPU::FLAT_SCR; 2494 RegWidth = 64; 2495 return true; 2496 } 2497 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2498 Reg = AMDGPU::XNACK_MASK; 2499 RegWidth = 64; 2500 return true; 2501 } 2502 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2503 Reg = AMDGPU::VCC; 2504 RegWidth = 64; 2505 return true; 2506 } 2507 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2508 Reg = AMDGPU::TBA; 2509 RegWidth = 64; 2510 return true; 2511 } 2512 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2513 Reg = AMDGPU::TMA; 2514 RegWidth = 64; 2515 return true; 2516 } 2517 Error(Loc, "register does not fit in the list"); 2518 return false; 2519 case IS_VGPR: 2520 case IS_SGPR: 2521 case IS_AGPR: 2522 case IS_TTMP: 2523 if (Reg1 != Reg + RegWidth / 32) { 2524 Error(Loc, "registers in a list must have consecutive indices"); 2525 return false; 2526 } 2527 RegWidth += 32; 2528 return true; 2529 default: 2530 llvm_unreachable("unexpected register kind"); 2531 } 2532 } 2533 2534 struct RegInfo { 2535 StringLiteral Name; 2536 RegisterKind Kind; 2537 }; 2538 2539 static constexpr RegInfo RegularRegisters[] = { 2540 {{"v"}, IS_VGPR}, 2541 {{"s"}, IS_SGPR}, 2542 {{"ttmp"}, IS_TTMP}, 2543 {{"acc"}, IS_AGPR}, 2544 {{"a"}, IS_AGPR}, 2545 }; 2546 2547 static bool isRegularReg(RegisterKind Kind) { 2548 return Kind == IS_VGPR || 2549 Kind == IS_SGPR || 2550 Kind == IS_TTMP || 2551 Kind == IS_AGPR; 2552 } 2553 2554 static const RegInfo* getRegularRegInfo(StringRef Str) { 2555 for (const RegInfo &Reg : RegularRegisters) 2556 if (Str.startswith(Reg.Name)) 2557 return &Reg; 2558 return nullptr; 2559 } 2560 2561 static bool getRegNum(StringRef Str, unsigned& Num) { 2562 return !Str.getAsInteger(10, Num); 2563 } 2564 2565 bool 2566 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2567 const AsmToken &NextToken) const { 2568 2569 // A list of consecutive registers: [s0,s1,s2,s3] 2570 if (Token.is(AsmToken::LBrac)) 2571 return true; 2572 2573 if (!Token.is(AsmToken::Identifier)) 2574 return false; 2575 2576 // A single register like s0 or a range of registers like s[0:1] 2577 2578 StringRef Str = Token.getString(); 2579 const RegInfo *Reg = getRegularRegInfo(Str); 2580 if (Reg) { 2581 StringRef RegName = Reg->Name; 2582 StringRef RegSuffix = Str.substr(RegName.size()); 2583 if (!RegSuffix.empty()) { 2584 unsigned Num; 2585 // A single register with an index: rXX 2586 if (getRegNum(RegSuffix, Num)) 2587 return true; 2588 } else { 2589 // A range of registers: r[XX:YY]. 2590 if (NextToken.is(AsmToken::LBrac)) 2591 return true; 2592 } 2593 } 2594 2595 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2596 } 2597 2598 bool 2599 AMDGPUAsmParser::isRegister() 2600 { 2601 return isRegister(getToken(), peekToken()); 2602 } 2603 2604 unsigned 2605 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2606 unsigned RegNum, 2607 unsigned RegWidth, 2608 SMLoc Loc) { 2609 2610 assert(isRegularReg(RegKind)); 2611 2612 unsigned AlignSize = 1; 2613 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2614 // SGPR and TTMP registers must be aligned. 2615 // Max required alignment is 4 dwords. 2616 AlignSize = std::min(RegWidth / 32, 4u); 2617 } 2618 2619 if (RegNum % AlignSize != 0) { 2620 Error(Loc, "invalid register alignment"); 2621 return AMDGPU::NoRegister; 2622 } 2623 2624 unsigned RegIdx = RegNum / AlignSize; 2625 int RCID = getRegClass(RegKind, RegWidth); 2626 if (RCID == -1) { 2627 Error(Loc, "invalid or unsupported register size"); 2628 return AMDGPU::NoRegister; 2629 } 2630 2631 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2632 const MCRegisterClass RC = TRI->getRegClass(RCID); 2633 if (RegIdx >= RC.getNumRegs()) { 2634 Error(Loc, "register index is out of range"); 2635 return AMDGPU::NoRegister; 2636 } 2637 2638 return RC.getRegister(RegIdx); 2639 } 2640 2641 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2642 int64_t RegLo, RegHi; 2643 if (!skipToken(AsmToken::LBrac, "missing register index")) 2644 return false; 2645 2646 SMLoc FirstIdxLoc = getLoc(); 2647 SMLoc SecondIdxLoc; 2648 2649 if (!parseExpr(RegLo)) 2650 return false; 2651 2652 if (trySkipToken(AsmToken::Colon)) { 2653 SecondIdxLoc = getLoc(); 2654 if (!parseExpr(RegHi)) 2655 return false; 2656 } else { 2657 RegHi = RegLo; 2658 } 2659 2660 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2661 return false; 2662 2663 if (!isUInt<32>(RegLo)) { 2664 Error(FirstIdxLoc, "invalid register index"); 2665 return false; 2666 } 2667 2668 if (!isUInt<32>(RegHi)) { 2669 Error(SecondIdxLoc, "invalid register index"); 2670 return false; 2671 } 2672 2673 if (RegLo > RegHi) { 2674 Error(FirstIdxLoc, "first register index should not exceed second index"); 2675 return false; 2676 } 2677 2678 Num = static_cast<unsigned>(RegLo); 2679 RegWidth = 32 * ((RegHi - RegLo) + 1); 2680 return true; 2681 } 2682 2683 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2684 unsigned &RegNum, unsigned &RegWidth, 2685 SmallVectorImpl<AsmToken> &Tokens) { 2686 assert(isToken(AsmToken::Identifier)); 2687 unsigned Reg = getSpecialRegForName(getTokenStr()); 2688 if (Reg) { 2689 RegNum = 0; 2690 RegWidth = 32; 2691 RegKind = IS_SPECIAL; 2692 Tokens.push_back(getToken()); 2693 lex(); // skip register name 2694 } 2695 return Reg; 2696 } 2697 2698 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2699 unsigned &RegNum, unsigned &RegWidth, 2700 SmallVectorImpl<AsmToken> &Tokens) { 2701 assert(isToken(AsmToken::Identifier)); 2702 StringRef RegName = getTokenStr(); 2703 auto Loc = getLoc(); 2704 2705 const RegInfo *RI = getRegularRegInfo(RegName); 2706 if (!RI) { 2707 Error(Loc, "invalid register name"); 2708 return AMDGPU::NoRegister; 2709 } 2710 2711 Tokens.push_back(getToken()); 2712 lex(); // skip register name 2713 2714 RegKind = RI->Kind; 2715 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2716 if (!RegSuffix.empty()) { 2717 // Single 32-bit register: vXX. 2718 if (!getRegNum(RegSuffix, RegNum)) { 2719 Error(Loc, "invalid register index"); 2720 return AMDGPU::NoRegister; 2721 } 2722 RegWidth = 32; 2723 } else { 2724 // Range of registers: v[XX:YY]. ":YY" is optional. 2725 if (!ParseRegRange(RegNum, RegWidth)) 2726 return AMDGPU::NoRegister; 2727 } 2728 2729 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2730 } 2731 2732 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2733 unsigned &RegWidth, 2734 SmallVectorImpl<AsmToken> &Tokens) { 2735 unsigned Reg = AMDGPU::NoRegister; 2736 auto ListLoc = getLoc(); 2737 2738 if (!skipToken(AsmToken::LBrac, 2739 "expected a register or a list of registers")) { 2740 return AMDGPU::NoRegister; 2741 } 2742 2743 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2744 2745 auto Loc = getLoc(); 2746 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2747 return AMDGPU::NoRegister; 2748 if (RegWidth != 32) { 2749 Error(Loc, "expected a single 32-bit register"); 2750 return AMDGPU::NoRegister; 2751 } 2752 2753 for (; trySkipToken(AsmToken::Comma); ) { 2754 RegisterKind NextRegKind; 2755 unsigned NextReg, NextRegNum, NextRegWidth; 2756 Loc = getLoc(); 2757 2758 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2759 NextRegNum, NextRegWidth, 2760 Tokens)) { 2761 return AMDGPU::NoRegister; 2762 } 2763 if (NextRegWidth != 32) { 2764 Error(Loc, "expected a single 32-bit register"); 2765 return AMDGPU::NoRegister; 2766 } 2767 if (NextRegKind != RegKind) { 2768 Error(Loc, "registers in a list must be of the same kind"); 2769 return AMDGPU::NoRegister; 2770 } 2771 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2772 return AMDGPU::NoRegister; 2773 } 2774 2775 if (!skipToken(AsmToken::RBrac, 2776 "expected a comma or a closing square bracket")) { 2777 return AMDGPU::NoRegister; 2778 } 2779 2780 if (isRegularReg(RegKind)) 2781 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2782 2783 return Reg; 2784 } 2785 2786 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2787 unsigned &RegNum, unsigned &RegWidth, 2788 SmallVectorImpl<AsmToken> &Tokens) { 2789 auto Loc = getLoc(); 2790 Reg = AMDGPU::NoRegister; 2791 2792 if (isToken(AsmToken::Identifier)) { 2793 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2794 if (Reg == AMDGPU::NoRegister) 2795 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2796 } else { 2797 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2798 } 2799 2800 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2801 if (Reg == AMDGPU::NoRegister) { 2802 assert(Parser.hasPendingError()); 2803 return false; 2804 } 2805 2806 if (!subtargetHasRegister(*TRI, Reg)) { 2807 if (Reg == AMDGPU::SGPR_NULL) { 2808 Error(Loc, "'null' operand is not supported on this GPU"); 2809 } else { 2810 Error(Loc, "register not available on this GPU"); 2811 } 2812 return false; 2813 } 2814 2815 return true; 2816 } 2817 2818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2819 unsigned &RegNum, unsigned &RegWidth, 2820 bool RestoreOnFailure /*=false*/) { 2821 Reg = AMDGPU::NoRegister; 2822 2823 SmallVector<AsmToken, 1> Tokens; 2824 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2825 if (RestoreOnFailure) { 2826 while (!Tokens.empty()) { 2827 getLexer().UnLex(Tokens.pop_back_val()); 2828 } 2829 } 2830 return true; 2831 } 2832 return false; 2833 } 2834 2835 Optional<StringRef> 2836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2837 switch (RegKind) { 2838 case IS_VGPR: 2839 return StringRef(".amdgcn.next_free_vgpr"); 2840 case IS_SGPR: 2841 return StringRef(".amdgcn.next_free_sgpr"); 2842 default: 2843 return None; 2844 } 2845 } 2846 2847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2848 auto SymbolName = getGprCountSymbolName(RegKind); 2849 assert(SymbolName && "initializing invalid register kind"); 2850 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2851 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2852 } 2853 2854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2855 unsigned DwordRegIndex, 2856 unsigned RegWidth) { 2857 // Symbols are only defined for GCN targets 2858 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2859 return true; 2860 2861 auto SymbolName = getGprCountSymbolName(RegKind); 2862 if (!SymbolName) 2863 return true; 2864 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2865 2866 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2867 int64_t OldCount; 2868 2869 if (!Sym->isVariable()) 2870 return !Error(getLoc(), 2871 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2872 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2873 return !Error( 2874 getLoc(), 2875 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2876 2877 if (OldCount <= NewMax) 2878 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2879 2880 return true; 2881 } 2882 2883 std::unique_ptr<AMDGPUOperand> 2884 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2885 const auto &Tok = getToken(); 2886 SMLoc StartLoc = Tok.getLoc(); 2887 SMLoc EndLoc = Tok.getEndLoc(); 2888 RegisterKind RegKind; 2889 unsigned Reg, RegNum, RegWidth; 2890 2891 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2892 return nullptr; 2893 } 2894 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2895 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2896 return nullptr; 2897 } else 2898 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2899 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2900 } 2901 2902 OperandMatchResultTy 2903 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2904 // TODO: add syntactic sugar for 1/(2*PI) 2905 2906 if (isRegister()) 2907 return MatchOperand_NoMatch; 2908 assert(!isModifier()); 2909 2910 const auto& Tok = getToken(); 2911 const auto& NextTok = peekToken(); 2912 bool IsReal = Tok.is(AsmToken::Real); 2913 SMLoc S = getLoc(); 2914 bool Negate = false; 2915 2916 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2917 lex(); 2918 IsReal = true; 2919 Negate = true; 2920 } 2921 2922 if (IsReal) { 2923 // Floating-point expressions are not supported. 2924 // Can only allow floating-point literals with an 2925 // optional sign. 2926 2927 StringRef Num = getTokenStr(); 2928 lex(); 2929 2930 APFloat RealVal(APFloat::IEEEdouble()); 2931 auto roundMode = APFloat::rmNearestTiesToEven; 2932 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2933 return MatchOperand_ParseFail; 2934 } 2935 if (Negate) 2936 RealVal.changeSign(); 2937 2938 Operands.push_back( 2939 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2940 AMDGPUOperand::ImmTyNone, true)); 2941 2942 return MatchOperand_Success; 2943 2944 } else { 2945 int64_t IntVal; 2946 const MCExpr *Expr; 2947 SMLoc S = getLoc(); 2948 2949 if (HasSP3AbsModifier) { 2950 // This is a workaround for handling expressions 2951 // as arguments of SP3 'abs' modifier, for example: 2952 // |1.0| 2953 // |-1| 2954 // |1+x| 2955 // This syntax is not compatible with syntax of standard 2956 // MC expressions (due to the trailing '|'). 2957 SMLoc EndLoc; 2958 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2959 return MatchOperand_ParseFail; 2960 } else { 2961 if (Parser.parseExpression(Expr)) 2962 return MatchOperand_ParseFail; 2963 } 2964 2965 if (Expr->evaluateAsAbsolute(IntVal)) { 2966 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2967 } else { 2968 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2969 } 2970 2971 return MatchOperand_Success; 2972 } 2973 2974 return MatchOperand_NoMatch; 2975 } 2976 2977 OperandMatchResultTy 2978 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2979 if (!isRegister()) 2980 return MatchOperand_NoMatch; 2981 2982 if (auto R = parseRegister()) { 2983 assert(R->isReg()); 2984 Operands.push_back(std::move(R)); 2985 return MatchOperand_Success; 2986 } 2987 return MatchOperand_ParseFail; 2988 } 2989 2990 OperandMatchResultTy 2991 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2992 auto res = parseReg(Operands); 2993 if (res != MatchOperand_NoMatch) { 2994 return res; 2995 } else if (isModifier()) { 2996 return MatchOperand_NoMatch; 2997 } else { 2998 return parseImm(Operands, HasSP3AbsMod); 2999 } 3000 } 3001 3002 bool 3003 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3004 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3005 const auto &str = Token.getString(); 3006 return str == "abs" || str == "neg" || str == "sext"; 3007 } 3008 return false; 3009 } 3010 3011 bool 3012 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3013 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3014 } 3015 3016 bool 3017 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3018 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3024 } 3025 3026 // Check if this is an operand modifier or an opcode modifier 3027 // which may look like an expression but it is not. We should 3028 // avoid parsing these modifiers as expressions. Currently 3029 // recognized sequences are: 3030 // |...| 3031 // abs(...) 3032 // neg(...) 3033 // sext(...) 3034 // -reg 3035 // -|...| 3036 // -abs(...) 3037 // name:... 3038 // Note that simple opcode modifiers like 'gds' may be parsed as 3039 // expressions; this is a special case. See getExpressionAsToken. 3040 // 3041 bool 3042 AMDGPUAsmParser::isModifier() { 3043 3044 AsmToken Tok = getToken(); 3045 AsmToken NextToken[2]; 3046 peekTokens(NextToken); 3047 3048 return isOperandModifier(Tok, NextToken[0]) || 3049 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3050 isOpcodeModifierWithVal(Tok, NextToken[0]); 3051 } 3052 3053 // Check if the current token is an SP3 'neg' modifier. 3054 // Currently this modifier is allowed in the following context: 3055 // 3056 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3057 // 2. Before an 'abs' modifier: -abs(...) 3058 // 3. Before an SP3 'abs' modifier: -|...| 3059 // 3060 // In all other cases "-" is handled as a part 3061 // of an expression that follows the sign. 3062 // 3063 // Note: When "-" is followed by an integer literal, 3064 // this is interpreted as integer negation rather 3065 // than a floating-point NEG modifier applied to N. 3066 // Beside being contr-intuitive, such use of floating-point 3067 // NEG modifier would have resulted in different meaning 3068 // of integer literals used with VOP1/2/C and VOP3, 3069 // for example: 3070 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3071 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3072 // Negative fp literals with preceding "-" are 3073 // handled likewise for uniformity 3074 // 3075 bool 3076 AMDGPUAsmParser::parseSP3NegModifier() { 3077 3078 AsmToken NextToken[2]; 3079 peekTokens(NextToken); 3080 3081 if (isToken(AsmToken::Minus) && 3082 (isRegister(NextToken[0], NextToken[1]) || 3083 NextToken[0].is(AsmToken::Pipe) || 3084 isId(NextToken[0], "abs"))) { 3085 lex(); 3086 return true; 3087 } 3088 3089 return false; 3090 } 3091 3092 OperandMatchResultTy 3093 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3094 bool AllowImm) { 3095 bool Neg, SP3Neg; 3096 bool Abs, SP3Abs; 3097 SMLoc Loc; 3098 3099 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3100 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3101 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3102 return MatchOperand_ParseFail; 3103 } 3104 3105 SP3Neg = parseSP3NegModifier(); 3106 3107 Loc = getLoc(); 3108 Neg = trySkipId("neg"); 3109 if (Neg && SP3Neg) { 3110 Error(Loc, "expected register or immediate"); 3111 return MatchOperand_ParseFail; 3112 } 3113 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3114 return MatchOperand_ParseFail; 3115 3116 Abs = trySkipId("abs"); 3117 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3118 return MatchOperand_ParseFail; 3119 3120 Loc = getLoc(); 3121 SP3Abs = trySkipToken(AsmToken::Pipe); 3122 if (Abs && SP3Abs) { 3123 Error(Loc, "expected register or immediate"); 3124 return MatchOperand_ParseFail; 3125 } 3126 3127 OperandMatchResultTy Res; 3128 if (AllowImm) { 3129 Res = parseRegOrImm(Operands, SP3Abs); 3130 } else { 3131 Res = parseReg(Operands); 3132 } 3133 if (Res != MatchOperand_Success) { 3134 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3135 } 3136 3137 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3138 return MatchOperand_ParseFail; 3139 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3140 return MatchOperand_ParseFail; 3141 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3142 return MatchOperand_ParseFail; 3143 3144 AMDGPUOperand::Modifiers Mods; 3145 Mods.Abs = Abs || SP3Abs; 3146 Mods.Neg = Neg || SP3Neg; 3147 3148 if (Mods.hasFPModifiers()) { 3149 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3150 if (Op.isExpr()) { 3151 Error(Op.getStartLoc(), "expected an absolute expression"); 3152 return MatchOperand_ParseFail; 3153 } 3154 Op.setModifiers(Mods); 3155 } 3156 return MatchOperand_Success; 3157 } 3158 3159 OperandMatchResultTy 3160 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3161 bool AllowImm) { 3162 bool Sext = trySkipId("sext"); 3163 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3164 return MatchOperand_ParseFail; 3165 3166 OperandMatchResultTy Res; 3167 if (AllowImm) { 3168 Res = parseRegOrImm(Operands); 3169 } else { 3170 Res = parseReg(Operands); 3171 } 3172 if (Res != MatchOperand_Success) { 3173 return Sext? MatchOperand_ParseFail : Res; 3174 } 3175 3176 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3177 return MatchOperand_ParseFail; 3178 3179 AMDGPUOperand::Modifiers Mods; 3180 Mods.Sext = Sext; 3181 3182 if (Mods.hasIntModifiers()) { 3183 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3184 if (Op.isExpr()) { 3185 Error(Op.getStartLoc(), "expected an absolute expression"); 3186 return MatchOperand_ParseFail; 3187 } 3188 Op.setModifiers(Mods); 3189 } 3190 3191 return MatchOperand_Success; 3192 } 3193 3194 OperandMatchResultTy 3195 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3196 return parseRegOrImmWithFPInputMods(Operands, false); 3197 } 3198 3199 OperandMatchResultTy 3200 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3201 return parseRegOrImmWithIntInputMods(Operands, false); 3202 } 3203 3204 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3205 auto Loc = getLoc(); 3206 if (trySkipId("off")) { 3207 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3208 AMDGPUOperand::ImmTyOff, false)); 3209 return MatchOperand_Success; 3210 } 3211 3212 if (!isRegister()) 3213 return MatchOperand_NoMatch; 3214 3215 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3216 if (Reg) { 3217 Operands.push_back(std::move(Reg)); 3218 return MatchOperand_Success; 3219 } 3220 3221 return MatchOperand_ParseFail; 3222 3223 } 3224 3225 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3226 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3227 3228 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3229 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3230 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3231 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3232 return Match_InvalidOperand; 3233 3234 if ((TSFlags & SIInstrFlags::VOP3) && 3235 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3236 getForcedEncodingSize() != 64) 3237 return Match_PreferE32; 3238 3239 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3240 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3241 // v_mac_f32/16 allow only dst_sel == DWORD; 3242 auto OpNum = 3243 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3244 const auto &Op = Inst.getOperand(OpNum); 3245 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3246 return Match_InvalidOperand; 3247 } 3248 } 3249 3250 return Match_Success; 3251 } 3252 3253 static ArrayRef<unsigned> getAllVariants() { 3254 static const unsigned Variants[] = { 3255 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3256 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3257 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3258 }; 3259 3260 return makeArrayRef(Variants); 3261 } 3262 3263 // What asm variants we should check 3264 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3265 if (isForcedDPP() && isForcedVOP3()) { 3266 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3267 return makeArrayRef(Variants); 3268 } 3269 if (getForcedEncodingSize() == 32) { 3270 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3271 return makeArrayRef(Variants); 3272 } 3273 3274 if (isForcedVOP3()) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 if (isForcedSDWA()) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3281 AMDGPUAsmVariants::SDWA9}; 3282 return makeArrayRef(Variants); 3283 } 3284 3285 if (isForcedDPP()) { 3286 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3287 return makeArrayRef(Variants); 3288 } 3289 3290 return getAllVariants(); 3291 } 3292 3293 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3294 if (isForcedDPP() && isForcedVOP3()) 3295 return "e64_dpp"; 3296 3297 if (getForcedEncodingSize() == 32) 3298 return "e32"; 3299 3300 if (isForcedVOP3()) 3301 return "e64"; 3302 3303 if (isForcedSDWA()) 3304 return "sdwa"; 3305 3306 if (isForcedDPP()) 3307 return "dpp"; 3308 3309 return ""; 3310 } 3311 3312 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3313 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3314 const unsigned Num = Desc.getNumImplicitUses(); 3315 for (unsigned i = 0; i < Num; ++i) { 3316 unsigned Reg = Desc.ImplicitUses[i]; 3317 switch (Reg) { 3318 case AMDGPU::FLAT_SCR: 3319 case AMDGPU::VCC: 3320 case AMDGPU::VCC_LO: 3321 case AMDGPU::VCC_HI: 3322 case AMDGPU::M0: 3323 return Reg; 3324 default: 3325 break; 3326 } 3327 } 3328 return AMDGPU::NoRegister; 3329 } 3330 3331 // NB: This code is correct only when used to check constant 3332 // bus limitations because GFX7 support no f16 inline constants. 3333 // Note that there are no cases when a GFX7 opcode violates 3334 // constant bus limitations due to the use of an f16 constant. 3335 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3336 unsigned OpIdx) const { 3337 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3338 3339 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3340 return false; 3341 } 3342 3343 const MCOperand &MO = Inst.getOperand(OpIdx); 3344 3345 int64_t Val = MO.getImm(); 3346 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3347 3348 switch (OpSize) { // expected operand size 3349 case 8: 3350 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3351 case 4: 3352 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3353 case 2: { 3354 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3355 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3356 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3357 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3358 return AMDGPU::isInlinableIntLiteral(Val); 3359 3360 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3362 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3363 return AMDGPU::isInlinableIntLiteralV216(Val); 3364 3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3368 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3369 3370 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3371 } 3372 default: 3373 llvm_unreachable("invalid operand size"); 3374 } 3375 } 3376 3377 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3378 if (!isGFX10Plus()) 3379 return 1; 3380 3381 switch (Opcode) { 3382 // 64-bit shift instructions can use only one scalar value input 3383 case AMDGPU::V_LSHLREV_B64_e64: 3384 case AMDGPU::V_LSHLREV_B64_gfx10: 3385 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3386 case AMDGPU::V_LSHRREV_B64_e64: 3387 case AMDGPU::V_LSHRREV_B64_gfx10: 3388 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3389 case AMDGPU::V_ASHRREV_I64_e64: 3390 case AMDGPU::V_ASHRREV_I64_gfx10: 3391 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3392 case AMDGPU::V_LSHL_B64_e64: 3393 case AMDGPU::V_LSHR_B64_e64: 3394 case AMDGPU::V_ASHR_I64_e64: 3395 return 1; 3396 default: 3397 return 2; 3398 } 3399 } 3400 3401 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3402 const MCOperand &MO = Inst.getOperand(OpIdx); 3403 if (MO.isImm()) { 3404 return !isInlineConstant(Inst, OpIdx); 3405 } else if (MO.isReg()) { 3406 auto Reg = MO.getReg(); 3407 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3408 auto PReg = mc2PseudoReg(Reg); 3409 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3410 } else { 3411 return true; 3412 } 3413 } 3414 3415 bool 3416 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3417 const OperandVector &Operands) { 3418 const unsigned Opcode = Inst.getOpcode(); 3419 const MCInstrDesc &Desc = MII.get(Opcode); 3420 unsigned LastSGPR = AMDGPU::NoRegister; 3421 unsigned ConstantBusUseCount = 0; 3422 unsigned NumLiterals = 0; 3423 unsigned LiteralSize; 3424 3425 if (Desc.TSFlags & 3426 (SIInstrFlags::VOPC | 3427 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3428 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3429 SIInstrFlags::SDWA)) { 3430 // Check special imm operands (used by madmk, etc) 3431 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3432 ++NumLiterals; 3433 LiteralSize = 4; 3434 } 3435 3436 SmallDenseSet<unsigned> SGPRsUsed; 3437 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3438 if (SGPRUsed != AMDGPU::NoRegister) { 3439 SGPRsUsed.insert(SGPRUsed); 3440 ++ConstantBusUseCount; 3441 } 3442 3443 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3444 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3445 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3446 3447 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3448 3449 for (int OpIdx : OpIndices) { 3450 if (OpIdx == -1) break; 3451 3452 const MCOperand &MO = Inst.getOperand(OpIdx); 3453 if (usesConstantBus(Inst, OpIdx)) { 3454 if (MO.isReg()) { 3455 LastSGPR = mc2PseudoReg(MO.getReg()); 3456 // Pairs of registers with a partial intersections like these 3457 // s0, s[0:1] 3458 // flat_scratch_lo, flat_scratch 3459 // flat_scratch_lo, flat_scratch_hi 3460 // are theoretically valid but they are disabled anyway. 3461 // Note that this code mimics SIInstrInfo::verifyInstruction 3462 if (SGPRsUsed.insert(LastSGPR).second) { 3463 ++ConstantBusUseCount; 3464 } 3465 } else { // Expression or a literal 3466 3467 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3468 continue; // special operand like VINTERP attr_chan 3469 3470 // An instruction may use only one literal. 3471 // This has been validated on the previous step. 3472 // See validateVOPLiteral. 3473 // This literal may be used as more than one operand. 3474 // If all these operands are of the same size, 3475 // this literal counts as one scalar value. 3476 // Otherwise it counts as 2 scalar values. 3477 // See "GFX10 Shader Programming", section 3.6.2.3. 3478 3479 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3480 if (Size < 4) Size = 4; 3481 3482 if (NumLiterals == 0) { 3483 NumLiterals = 1; 3484 LiteralSize = Size; 3485 } else if (LiteralSize != Size) { 3486 NumLiterals = 2; 3487 } 3488 } 3489 } 3490 } 3491 } 3492 ConstantBusUseCount += NumLiterals; 3493 3494 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3495 return true; 3496 3497 SMLoc LitLoc = getLitLoc(Operands); 3498 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3499 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3500 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3501 return false; 3502 } 3503 3504 bool 3505 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3506 const OperandVector &Operands) { 3507 const unsigned Opcode = Inst.getOpcode(); 3508 const MCInstrDesc &Desc = MII.get(Opcode); 3509 3510 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3511 if (DstIdx == -1 || 3512 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3513 return true; 3514 } 3515 3516 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3517 3518 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3519 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3520 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3521 3522 assert(DstIdx != -1); 3523 const MCOperand &Dst = Inst.getOperand(DstIdx); 3524 assert(Dst.isReg()); 3525 3526 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3527 3528 for (int SrcIdx : SrcIndices) { 3529 if (SrcIdx == -1) break; 3530 const MCOperand &Src = Inst.getOperand(SrcIdx); 3531 if (Src.isReg()) { 3532 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3533 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3534 Error(getRegLoc(SrcReg, Operands), 3535 "destination must be different than all sources"); 3536 return false; 3537 } 3538 } 3539 } 3540 3541 return true; 3542 } 3543 3544 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3545 3546 const unsigned Opc = Inst.getOpcode(); 3547 const MCInstrDesc &Desc = MII.get(Opc); 3548 3549 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3550 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3551 assert(ClampIdx != -1); 3552 return Inst.getOperand(ClampIdx).getImm() == 0; 3553 } 3554 3555 return true; 3556 } 3557 3558 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3559 3560 const unsigned Opc = Inst.getOpcode(); 3561 const MCInstrDesc &Desc = MII.get(Opc); 3562 3563 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3564 return None; 3565 3566 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3567 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3568 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3569 3570 assert(VDataIdx != -1); 3571 3572 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3573 return None; 3574 3575 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3576 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3577 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3578 if (DMask == 0) 3579 DMask = 1; 3580 3581 bool isPackedD16 = false; 3582 unsigned DataSize = 3583 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3584 if (hasPackedD16()) { 3585 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3586 isPackedD16 = D16Idx >= 0; 3587 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3588 DataSize = (DataSize + 1) / 2; 3589 } 3590 3591 if ((VDataSize / 4) == DataSize + TFESize) 3592 return None; 3593 3594 return StringRef(isPackedD16 3595 ? "image data size does not match dmask, d16 and tfe" 3596 : "image data size does not match dmask and tfe"); 3597 } 3598 3599 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3600 const unsigned Opc = Inst.getOpcode(); 3601 const MCInstrDesc &Desc = MII.get(Opc); 3602 3603 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3604 return true; 3605 3606 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3607 3608 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3609 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3610 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3611 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3612 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3613 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3614 3615 assert(VAddr0Idx != -1); 3616 assert(SrsrcIdx != -1); 3617 assert(SrsrcIdx > VAddr0Idx); 3618 3619 if (DimIdx == -1) 3620 return true; // intersect_ray 3621 3622 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3623 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3624 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3625 unsigned ActualAddrSize = 3626 IsNSA ? SrsrcIdx - VAddr0Idx 3627 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3628 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3629 3630 unsigned ExpectedAddrSize = 3631 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3632 3633 if (!IsNSA) { 3634 if (ExpectedAddrSize > 8) 3635 ExpectedAddrSize = 16; 3636 3637 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3638 // This provides backward compatibility for assembly created 3639 // before 160b/192b/224b types were directly supported. 3640 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3641 return true; 3642 } 3643 3644 return ActualAddrSize == ExpectedAddrSize; 3645 } 3646 3647 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3648 3649 const unsigned Opc = Inst.getOpcode(); 3650 const MCInstrDesc &Desc = MII.get(Opc); 3651 3652 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3653 return true; 3654 if (!Desc.mayLoad() || !Desc.mayStore()) 3655 return true; // Not atomic 3656 3657 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3658 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3659 3660 // This is an incomplete check because image_atomic_cmpswap 3661 // may only use 0x3 and 0xf while other atomic operations 3662 // may use 0x1 and 0x3. However these limitations are 3663 // verified when we check that dmask matches dst size. 3664 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3665 } 3666 3667 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3668 3669 const unsigned Opc = Inst.getOpcode(); 3670 const MCInstrDesc &Desc = MII.get(Opc); 3671 3672 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3673 return true; 3674 3675 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3676 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3677 3678 // GATHER4 instructions use dmask in a different fashion compared to 3679 // other MIMG instructions. The only useful DMASK values are 3680 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3681 // (red,red,red,red) etc.) The ISA document doesn't mention 3682 // this. 3683 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3684 } 3685 3686 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3687 const unsigned Opc = Inst.getOpcode(); 3688 const MCInstrDesc &Desc = MII.get(Opc); 3689 3690 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3691 return true; 3692 3693 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3694 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3695 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3696 3697 if (!BaseOpcode->MSAA) 3698 return true; 3699 3700 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3701 assert(DimIdx != -1); 3702 3703 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3704 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3705 3706 return DimInfo->MSAA; 3707 } 3708 3709 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3710 { 3711 switch (Opcode) { 3712 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3713 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3714 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3715 return true; 3716 default: 3717 return false; 3718 } 3719 } 3720 3721 // movrels* opcodes should only allow VGPRS as src0. 3722 // This is specified in .td description for vop1/vop3, 3723 // but sdwa is handled differently. See isSDWAOperand. 3724 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3725 const OperandVector &Operands) { 3726 3727 const unsigned Opc = Inst.getOpcode(); 3728 const MCInstrDesc &Desc = MII.get(Opc); 3729 3730 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3731 return true; 3732 3733 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3734 assert(Src0Idx != -1); 3735 3736 SMLoc ErrLoc; 3737 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3738 if (Src0.isReg()) { 3739 auto Reg = mc2PseudoReg(Src0.getReg()); 3740 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3741 if (!isSGPR(Reg, TRI)) 3742 return true; 3743 ErrLoc = getRegLoc(Reg, Operands); 3744 } else { 3745 ErrLoc = getConstLoc(Operands); 3746 } 3747 3748 Error(ErrLoc, "source operand must be a VGPR"); 3749 return false; 3750 } 3751 3752 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3753 const OperandVector &Operands) { 3754 3755 const unsigned Opc = Inst.getOpcode(); 3756 3757 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3758 return true; 3759 3760 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3761 assert(Src0Idx != -1); 3762 3763 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3764 if (!Src0.isReg()) 3765 return true; 3766 3767 auto Reg = mc2PseudoReg(Src0.getReg()); 3768 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3769 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3770 Error(getRegLoc(Reg, Operands), 3771 "source operand must be either a VGPR or an inline constant"); 3772 return false; 3773 } 3774 3775 return true; 3776 } 3777 3778 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3779 const OperandVector &Operands) { 3780 const unsigned Opc = Inst.getOpcode(); 3781 const MCInstrDesc &Desc = MII.get(Opc); 3782 3783 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3784 return true; 3785 3786 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3787 if (Src2Idx == -1) 3788 return true; 3789 3790 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3791 if (!Src2.isReg()) 3792 return true; 3793 3794 MCRegister Src2Reg = Src2.getReg(); 3795 MCRegister DstReg = Inst.getOperand(0).getReg(); 3796 if (Src2Reg == DstReg) 3797 return true; 3798 3799 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3800 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3801 return true; 3802 3803 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3804 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3805 "source 2 operand must not partially overlap with dst"); 3806 return false; 3807 } 3808 3809 return true; 3810 } 3811 3812 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3813 switch (Inst.getOpcode()) { 3814 default: 3815 return true; 3816 case V_DIV_SCALE_F32_gfx6_gfx7: 3817 case V_DIV_SCALE_F32_vi: 3818 case V_DIV_SCALE_F32_gfx10: 3819 case V_DIV_SCALE_F64_gfx6_gfx7: 3820 case V_DIV_SCALE_F64_vi: 3821 case V_DIV_SCALE_F64_gfx10: 3822 break; 3823 } 3824 3825 // TODO: Check that src0 = src1 or src2. 3826 3827 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3828 AMDGPU::OpName::src2_modifiers, 3829 AMDGPU::OpName::src2_modifiers}) { 3830 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3831 .getImm() & 3832 SISrcMods::ABS) { 3833 return false; 3834 } 3835 } 3836 3837 return true; 3838 } 3839 3840 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3841 3842 const unsigned Opc = Inst.getOpcode(); 3843 const MCInstrDesc &Desc = MII.get(Opc); 3844 3845 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3846 return true; 3847 3848 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3849 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3850 if (isCI() || isSI()) 3851 return false; 3852 } 3853 3854 return true; 3855 } 3856 3857 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3858 const unsigned Opc = Inst.getOpcode(); 3859 const MCInstrDesc &Desc = MII.get(Opc); 3860 3861 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3862 return true; 3863 3864 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3865 if (DimIdx < 0) 3866 return true; 3867 3868 long Imm = Inst.getOperand(DimIdx).getImm(); 3869 if (Imm < 0 || Imm >= 8) 3870 return false; 3871 3872 return true; 3873 } 3874 3875 static bool IsRevOpcode(const unsigned Opcode) 3876 { 3877 switch (Opcode) { 3878 case AMDGPU::V_SUBREV_F32_e32: 3879 case AMDGPU::V_SUBREV_F32_e64: 3880 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3881 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3882 case AMDGPU::V_SUBREV_F32_e32_vi: 3883 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3884 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3885 case AMDGPU::V_SUBREV_F32_e64_vi: 3886 3887 case AMDGPU::V_SUBREV_CO_U32_e32: 3888 case AMDGPU::V_SUBREV_CO_U32_e64: 3889 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3891 3892 case AMDGPU::V_SUBBREV_U32_e32: 3893 case AMDGPU::V_SUBBREV_U32_e64: 3894 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3895 case AMDGPU::V_SUBBREV_U32_e32_vi: 3896 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3897 case AMDGPU::V_SUBBREV_U32_e64_vi: 3898 3899 case AMDGPU::V_SUBREV_U32_e32: 3900 case AMDGPU::V_SUBREV_U32_e64: 3901 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3902 case AMDGPU::V_SUBREV_U32_e32_vi: 3903 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3904 case AMDGPU::V_SUBREV_U32_e64_vi: 3905 3906 case AMDGPU::V_SUBREV_F16_e32: 3907 case AMDGPU::V_SUBREV_F16_e64: 3908 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3909 case AMDGPU::V_SUBREV_F16_e32_vi: 3910 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3911 case AMDGPU::V_SUBREV_F16_e64_vi: 3912 3913 case AMDGPU::V_SUBREV_U16_e32: 3914 case AMDGPU::V_SUBREV_U16_e64: 3915 case AMDGPU::V_SUBREV_U16_e32_vi: 3916 case AMDGPU::V_SUBREV_U16_e64_vi: 3917 3918 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3919 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3920 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3921 3922 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3923 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3924 3925 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3926 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3927 3928 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3929 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3930 3931 case AMDGPU::V_LSHRREV_B32_e32: 3932 case AMDGPU::V_LSHRREV_B32_e64: 3933 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3934 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3935 case AMDGPU::V_LSHRREV_B32_e32_vi: 3936 case AMDGPU::V_LSHRREV_B32_e64_vi: 3937 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3938 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3939 3940 case AMDGPU::V_ASHRREV_I32_e32: 3941 case AMDGPU::V_ASHRREV_I32_e64: 3942 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3943 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3944 case AMDGPU::V_ASHRREV_I32_e32_vi: 3945 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3946 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3947 case AMDGPU::V_ASHRREV_I32_e64_vi: 3948 3949 case AMDGPU::V_LSHLREV_B32_e32: 3950 case AMDGPU::V_LSHLREV_B32_e64: 3951 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3952 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3953 case AMDGPU::V_LSHLREV_B32_e32_vi: 3954 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3955 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3956 case AMDGPU::V_LSHLREV_B32_e64_vi: 3957 3958 case AMDGPU::V_LSHLREV_B16_e32: 3959 case AMDGPU::V_LSHLREV_B16_e64: 3960 case AMDGPU::V_LSHLREV_B16_e32_vi: 3961 case AMDGPU::V_LSHLREV_B16_e64_vi: 3962 case AMDGPU::V_LSHLREV_B16_gfx10: 3963 3964 case AMDGPU::V_LSHRREV_B16_e32: 3965 case AMDGPU::V_LSHRREV_B16_e64: 3966 case AMDGPU::V_LSHRREV_B16_e32_vi: 3967 case AMDGPU::V_LSHRREV_B16_e64_vi: 3968 case AMDGPU::V_LSHRREV_B16_gfx10: 3969 3970 case AMDGPU::V_ASHRREV_I16_e32: 3971 case AMDGPU::V_ASHRREV_I16_e64: 3972 case AMDGPU::V_ASHRREV_I16_e32_vi: 3973 case AMDGPU::V_ASHRREV_I16_e64_vi: 3974 case AMDGPU::V_ASHRREV_I16_gfx10: 3975 3976 case AMDGPU::V_LSHLREV_B64_e64: 3977 case AMDGPU::V_LSHLREV_B64_gfx10: 3978 case AMDGPU::V_LSHLREV_B64_vi: 3979 3980 case AMDGPU::V_LSHRREV_B64_e64: 3981 case AMDGPU::V_LSHRREV_B64_gfx10: 3982 case AMDGPU::V_LSHRREV_B64_vi: 3983 3984 case AMDGPU::V_ASHRREV_I64_e64: 3985 case AMDGPU::V_ASHRREV_I64_gfx10: 3986 case AMDGPU::V_ASHRREV_I64_vi: 3987 3988 case AMDGPU::V_PK_LSHLREV_B16: 3989 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3990 case AMDGPU::V_PK_LSHLREV_B16_vi: 3991 3992 case AMDGPU::V_PK_LSHRREV_B16: 3993 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3994 case AMDGPU::V_PK_LSHRREV_B16_vi: 3995 case AMDGPU::V_PK_ASHRREV_I16: 3996 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3997 case AMDGPU::V_PK_ASHRREV_I16_vi: 3998 return true; 3999 default: 4000 return false; 4001 } 4002 } 4003 4004 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4005 4006 using namespace SIInstrFlags; 4007 const unsigned Opcode = Inst.getOpcode(); 4008 const MCInstrDesc &Desc = MII.get(Opcode); 4009 4010 // lds_direct register is defined so that it can be used 4011 // with 9-bit operands only. Ignore encodings which do not accept these. 4012 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4013 if ((Desc.TSFlags & Enc) == 0) 4014 return None; 4015 4016 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4017 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4018 if (SrcIdx == -1) 4019 break; 4020 const auto &Src = Inst.getOperand(SrcIdx); 4021 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4022 4023 if (isGFX90A() || isGFX11Plus()) 4024 return StringRef("lds_direct is not supported on this GPU"); 4025 4026 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4027 return StringRef("lds_direct cannot be used with this instruction"); 4028 4029 if (SrcName != OpName::src0) 4030 return StringRef("lds_direct may be used as src0 only"); 4031 } 4032 } 4033 4034 return None; 4035 } 4036 4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4038 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4039 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4040 if (Op.isFlatOffset()) 4041 return Op.getStartLoc(); 4042 } 4043 return getLoc(); 4044 } 4045 4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4047 const OperandVector &Operands) { 4048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4049 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4050 return true; 4051 4052 auto Opcode = Inst.getOpcode(); 4053 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4054 assert(OpNum != -1); 4055 4056 const auto &Op = Inst.getOperand(OpNum); 4057 if (!hasFlatOffsets() && Op.getImm() != 0) { 4058 Error(getFlatOffsetLoc(Operands), 4059 "flat offset modifier is not supported on this GPU"); 4060 return false; 4061 } 4062 4063 // For FLAT segment the offset must be positive; 4064 // MSB is ignored and forced to zero. 4065 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4066 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4067 if (!isIntN(OffsetSize, Op.getImm())) { 4068 Error(getFlatOffsetLoc(Operands), 4069 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4070 return false; 4071 } 4072 } else { 4073 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4074 if (!isUIntN(OffsetSize, Op.getImm())) { 4075 Error(getFlatOffsetLoc(Operands), 4076 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4077 return false; 4078 } 4079 } 4080 4081 return true; 4082 } 4083 4084 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4085 // Start with second operand because SMEM Offset cannot be dst or src0. 4086 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4088 if (Op.isSMEMOffset()) 4089 return Op.getStartLoc(); 4090 } 4091 return getLoc(); 4092 } 4093 4094 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4095 const OperandVector &Operands) { 4096 if (isCI() || isSI()) 4097 return true; 4098 4099 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4100 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4101 return true; 4102 4103 auto Opcode = Inst.getOpcode(); 4104 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4105 if (OpNum == -1) 4106 return true; 4107 4108 const auto &Op = Inst.getOperand(OpNum); 4109 if (!Op.isImm()) 4110 return true; 4111 4112 uint64_t Offset = Op.getImm(); 4113 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4114 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4115 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4116 return true; 4117 4118 Error(getSMEMOffsetLoc(Operands), 4119 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4120 "expected a 21-bit signed offset"); 4121 4122 return false; 4123 } 4124 4125 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4126 unsigned Opcode = Inst.getOpcode(); 4127 const MCInstrDesc &Desc = MII.get(Opcode); 4128 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4129 return true; 4130 4131 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4132 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4133 4134 const int OpIndices[] = { Src0Idx, Src1Idx }; 4135 4136 unsigned NumExprs = 0; 4137 unsigned NumLiterals = 0; 4138 uint32_t LiteralValue; 4139 4140 for (int OpIdx : OpIndices) { 4141 if (OpIdx == -1) break; 4142 4143 const MCOperand &MO = Inst.getOperand(OpIdx); 4144 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4145 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4146 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4147 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4148 if (NumLiterals == 0 || LiteralValue != Value) { 4149 LiteralValue = Value; 4150 ++NumLiterals; 4151 } 4152 } else if (MO.isExpr()) { 4153 ++NumExprs; 4154 } 4155 } 4156 } 4157 4158 return NumLiterals + NumExprs <= 1; 4159 } 4160 4161 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4162 const unsigned Opc = Inst.getOpcode(); 4163 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4164 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4165 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4166 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4167 4168 if (OpSel & ~3) 4169 return false; 4170 } 4171 4172 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4173 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4174 if (OpSelIdx != -1) { 4175 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4176 return false; 4177 } 4178 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4179 if (OpSelHiIdx != -1) { 4180 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4181 return false; 4182 } 4183 } 4184 4185 return true; 4186 } 4187 4188 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4189 const OperandVector &Operands) { 4190 const unsigned Opc = Inst.getOpcode(); 4191 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4192 if (DppCtrlIdx < 0) 4193 return true; 4194 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4195 4196 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4197 // DPP64 is supported for row_newbcast only. 4198 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4199 if (Src0Idx >= 0 && 4200 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4201 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4202 Error(S, "64 bit dpp only supports row_newbcast"); 4203 return false; 4204 } 4205 } 4206 4207 return true; 4208 } 4209 4210 // Check if VCC register matches wavefront size 4211 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4212 auto FB = getFeatureBits(); 4213 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4214 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4215 } 4216 4217 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4218 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4219 const OperandVector &Operands) { 4220 unsigned Opcode = Inst.getOpcode(); 4221 const MCInstrDesc &Desc = MII.get(Opcode); 4222 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4223 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4224 ImmIdx == -1) 4225 return true; 4226 4227 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4228 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4229 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4230 4231 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4232 4233 unsigned NumExprs = 0; 4234 unsigned NumLiterals = 0; 4235 uint32_t LiteralValue; 4236 4237 for (int OpIdx : OpIndices) { 4238 if (OpIdx == -1) 4239 continue; 4240 4241 const MCOperand &MO = Inst.getOperand(OpIdx); 4242 if (!MO.isImm() && !MO.isExpr()) 4243 continue; 4244 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4245 continue; 4246 4247 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4248 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4249 Error(getConstLoc(Operands), 4250 "inline constants are not allowed for this operand"); 4251 return false; 4252 } 4253 4254 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4255 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4256 if (NumLiterals == 0 || LiteralValue != Value) { 4257 LiteralValue = Value; 4258 ++NumLiterals; 4259 } 4260 } else if (MO.isExpr()) { 4261 ++NumExprs; 4262 } 4263 } 4264 NumLiterals += NumExprs; 4265 4266 if (!NumLiterals) 4267 return true; 4268 4269 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4270 Error(getLitLoc(Operands), "literal operands are not supported"); 4271 return false; 4272 } 4273 4274 if (NumLiterals > 1) { 4275 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4276 return false; 4277 } 4278 4279 return true; 4280 } 4281 4282 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4283 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4284 const MCRegisterInfo *MRI) { 4285 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4286 if (OpIdx < 0) 4287 return -1; 4288 4289 const MCOperand &Op = Inst.getOperand(OpIdx); 4290 if (!Op.isReg()) 4291 return -1; 4292 4293 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4294 auto Reg = Sub ? Sub : Op.getReg(); 4295 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4296 return AGPR32.contains(Reg) ? 1 : 0; 4297 } 4298 4299 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4300 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4301 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4302 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4303 SIInstrFlags::DS)) == 0) 4304 return true; 4305 4306 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4307 : AMDGPU::OpName::vdata; 4308 4309 const MCRegisterInfo *MRI = getMRI(); 4310 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4311 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4312 4313 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4314 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4315 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4316 return false; 4317 } 4318 4319 auto FB = getFeatureBits(); 4320 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4321 if (DataAreg < 0 || DstAreg < 0) 4322 return true; 4323 return DstAreg == DataAreg; 4324 } 4325 4326 return DstAreg < 1 && DataAreg < 1; 4327 } 4328 4329 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4330 auto FB = getFeatureBits(); 4331 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4332 return true; 4333 4334 const MCRegisterInfo *MRI = getMRI(); 4335 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4336 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4337 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4338 const MCOperand &Op = Inst.getOperand(I); 4339 if (!Op.isReg()) 4340 continue; 4341 4342 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4343 if (!Sub) 4344 continue; 4345 4346 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4347 return false; 4348 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4349 return false; 4350 } 4351 4352 return true; 4353 } 4354 4355 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4356 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4357 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4358 if (Op.isBLGP()) 4359 return Op.getStartLoc(); 4360 } 4361 return SMLoc(); 4362 } 4363 4364 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4365 const OperandVector &Operands) { 4366 unsigned Opc = Inst.getOpcode(); 4367 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4368 if (BlgpIdx == -1) 4369 return true; 4370 SMLoc BLGPLoc = getBLGPLoc(Operands); 4371 if (!BLGPLoc.isValid()) 4372 return true; 4373 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4374 auto FB = getFeatureBits(); 4375 bool UsesNeg = false; 4376 if (FB[AMDGPU::FeatureGFX940Insts]) { 4377 switch (Opc) { 4378 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4379 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4380 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4381 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4382 UsesNeg = true; 4383 } 4384 } 4385 4386 if (IsNeg == UsesNeg) 4387 return true; 4388 4389 Error(BLGPLoc, 4390 UsesNeg ? "invalid modifier: blgp is not supported" 4391 : "invalid modifier: neg is not supported"); 4392 4393 return false; 4394 } 4395 4396 // gfx90a has an undocumented limitation: 4397 // DS_GWS opcodes must use even aligned registers. 4398 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4399 const OperandVector &Operands) { 4400 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4401 return true; 4402 4403 int Opc = Inst.getOpcode(); 4404 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4405 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4406 return true; 4407 4408 const MCRegisterInfo *MRI = getMRI(); 4409 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4410 int Data0Pos = 4411 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4412 assert(Data0Pos != -1); 4413 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4414 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4415 if (RegIdx & 1) { 4416 SMLoc RegLoc = getRegLoc(Reg, Operands); 4417 Error(RegLoc, "vgpr must be even aligned"); 4418 return false; 4419 } 4420 4421 return true; 4422 } 4423 4424 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4425 const OperandVector &Operands, 4426 const SMLoc &IDLoc) { 4427 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4428 AMDGPU::OpName::cpol); 4429 if (CPolPos == -1) 4430 return true; 4431 4432 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4433 4434 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4435 if (TSFlags & SIInstrFlags::SMRD) { 4436 if (CPol && (isSI() || isCI())) { 4437 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4438 Error(S, "cache policy is not supported for SMRD instructions"); 4439 return false; 4440 } 4441 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4442 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4443 return false; 4444 } 4445 } 4446 4447 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4448 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4449 StringRef CStr(S.getPointer()); 4450 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4451 Error(S, "scc is not supported on this GPU"); 4452 return false; 4453 } 4454 4455 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4456 return true; 4457 4458 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4459 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4460 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4461 : "instruction must use glc"); 4462 return false; 4463 } 4464 } else { 4465 if (CPol & CPol::GLC) { 4466 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4467 StringRef CStr(S.getPointer()); 4468 S = SMLoc::getFromPointer( 4469 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4470 Error(S, isGFX940() ? "instruction must not use sc0" 4471 : "instruction must not use glc"); 4472 return false; 4473 } 4474 } 4475 4476 return true; 4477 } 4478 4479 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4480 const OperandVector &Operands, 4481 const SMLoc &IDLoc) { 4482 if (isGFX940()) 4483 return true; 4484 4485 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4486 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4487 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4488 return true; 4489 // This is FLAT LDS DMA. 4490 4491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4492 StringRef CStr(S.getPointer()); 4493 if (!CStr.startswith("lds")) { 4494 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4495 // And LDS version should have 'lds' modifier, but it follows optional 4496 // operands so its absense is ignored by the matcher. 4497 Error(IDLoc, "invalid operands for instruction"); 4498 return false; 4499 } 4500 4501 return true; 4502 } 4503 4504 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4505 if (!isGFX11Plus()) 4506 return true; 4507 for (auto &Operand : Operands) { 4508 if (!Operand->isReg()) 4509 continue; 4510 unsigned Reg = Operand->getReg(); 4511 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4512 Error(getRegLoc(Reg, Operands), 4513 "execz and vccz are not supported on this GPU"); 4514 return false; 4515 } 4516 } 4517 return true; 4518 } 4519 4520 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4521 const SMLoc &IDLoc, 4522 const OperandVector &Operands) { 4523 if (auto ErrMsg = validateLdsDirect(Inst)) { 4524 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4525 return false; 4526 } 4527 if (!validateSOPLiteral(Inst)) { 4528 Error(getLitLoc(Operands), 4529 "only one literal operand is allowed"); 4530 return false; 4531 } 4532 if (!validateVOPLiteral(Inst, Operands)) { 4533 return false; 4534 } 4535 if (!validateConstantBusLimitations(Inst, Operands)) { 4536 return false; 4537 } 4538 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4539 return false; 4540 } 4541 if (!validateIntClampSupported(Inst)) { 4542 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4543 "integer clamping is not supported on this GPU"); 4544 return false; 4545 } 4546 if (!validateOpSel(Inst)) { 4547 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4548 "invalid op_sel operand"); 4549 return false; 4550 } 4551 if (!validateDPP(Inst, Operands)) { 4552 return false; 4553 } 4554 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4555 if (!validateMIMGD16(Inst)) { 4556 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4557 "d16 modifier is not supported on this GPU"); 4558 return false; 4559 } 4560 if (!validateMIMGDim(Inst)) { 4561 Error(IDLoc, "dim modifier is required on this GPU"); 4562 return false; 4563 } 4564 if (!validateMIMGMSAA(Inst)) { 4565 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4566 "invalid dim; must be MSAA type"); 4567 return false; 4568 } 4569 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4570 Error(IDLoc, *ErrMsg); 4571 return false; 4572 } 4573 if (!validateMIMGAddrSize(Inst)) { 4574 Error(IDLoc, 4575 "image address size does not match dim and a16"); 4576 return false; 4577 } 4578 if (!validateMIMGAtomicDMask(Inst)) { 4579 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4580 "invalid atomic image dmask"); 4581 return false; 4582 } 4583 if (!validateMIMGGatherDMask(Inst)) { 4584 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4585 "invalid image_gather dmask: only one bit must be set"); 4586 return false; 4587 } 4588 if (!validateMovrels(Inst, Operands)) { 4589 return false; 4590 } 4591 if (!validateFlatOffset(Inst, Operands)) { 4592 return false; 4593 } 4594 if (!validateSMEMOffset(Inst, Operands)) { 4595 return false; 4596 } 4597 if (!validateMAIAccWrite(Inst, Operands)) { 4598 return false; 4599 } 4600 if (!validateMFMA(Inst, Operands)) { 4601 return false; 4602 } 4603 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4604 return false; 4605 } 4606 4607 if (!validateAGPRLdSt(Inst)) { 4608 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4609 ? "invalid register class: data and dst should be all VGPR or AGPR" 4610 : "invalid register class: agpr loads and stores not supported on this GPU" 4611 ); 4612 return false; 4613 } 4614 if (!validateVGPRAlign(Inst)) { 4615 Error(IDLoc, 4616 "invalid register class: vgpr tuples must be 64 bit aligned"); 4617 return false; 4618 } 4619 if (!validateGWS(Inst, Operands)) { 4620 return false; 4621 } 4622 4623 if (!validateBLGP(Inst, Operands)) { 4624 return false; 4625 } 4626 4627 if (!validateDivScale(Inst)) { 4628 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4629 return false; 4630 } 4631 if (!validateExeczVcczOperands(Operands)) { 4632 return false; 4633 } 4634 4635 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4636 return false; 4637 } 4638 4639 return true; 4640 } 4641 4642 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4643 const FeatureBitset &FBS, 4644 unsigned VariantID = 0); 4645 4646 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4647 const FeatureBitset &AvailableFeatures, 4648 unsigned VariantID); 4649 4650 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4651 const FeatureBitset &FBS) { 4652 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4653 } 4654 4655 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4656 const FeatureBitset &FBS, 4657 ArrayRef<unsigned> Variants) { 4658 for (auto Variant : Variants) { 4659 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4660 return true; 4661 } 4662 4663 return false; 4664 } 4665 4666 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4667 const SMLoc &IDLoc) { 4668 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4669 4670 // Check if requested instruction variant is supported. 4671 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4672 return false; 4673 4674 // This instruction is not supported. 4675 // Clear any other pending errors because they are no longer relevant. 4676 getParser().clearPendingErrors(); 4677 4678 // Requested instruction variant is not supported. 4679 // Check if any other variants are supported. 4680 StringRef VariantName = getMatchedVariantName(); 4681 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4682 return Error(IDLoc, 4683 Twine(VariantName, 4684 " variant of this instruction is not supported")); 4685 } 4686 4687 // Finally check if this instruction is supported on any other GPU. 4688 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4689 return Error(IDLoc, "instruction not supported on this GPU"); 4690 } 4691 4692 // Instruction not supported on any GPU. Probably a typo. 4693 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4694 return Error(IDLoc, "invalid instruction" + Suggestion); 4695 } 4696 4697 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4698 OperandVector &Operands, 4699 MCStreamer &Out, 4700 uint64_t &ErrorInfo, 4701 bool MatchingInlineAsm) { 4702 MCInst Inst; 4703 unsigned Result = Match_Success; 4704 for (auto Variant : getMatchedVariants()) { 4705 uint64_t EI; 4706 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4707 Variant); 4708 // We order match statuses from least to most specific. We use most specific 4709 // status as resulting 4710 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4711 if ((R == Match_Success) || 4712 (R == Match_PreferE32) || 4713 (R == Match_MissingFeature && Result != Match_PreferE32) || 4714 (R == Match_InvalidOperand && Result != Match_MissingFeature 4715 && Result != Match_PreferE32) || 4716 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4717 && Result != Match_MissingFeature 4718 && Result != Match_PreferE32)) { 4719 Result = R; 4720 ErrorInfo = EI; 4721 } 4722 if (R == Match_Success) 4723 break; 4724 } 4725 4726 if (Result == Match_Success) { 4727 if (!validateInstruction(Inst, IDLoc, Operands)) { 4728 return true; 4729 } 4730 Inst.setLoc(IDLoc); 4731 Out.emitInstruction(Inst, getSTI()); 4732 return false; 4733 } 4734 4735 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4736 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4737 return true; 4738 } 4739 4740 switch (Result) { 4741 default: break; 4742 case Match_MissingFeature: 4743 // It has been verified that the specified instruction 4744 // mnemonic is valid. A match was found but it requires 4745 // features which are not supported on this GPU. 4746 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4747 4748 case Match_InvalidOperand: { 4749 SMLoc ErrorLoc = IDLoc; 4750 if (ErrorInfo != ~0ULL) { 4751 if (ErrorInfo >= Operands.size()) { 4752 return Error(IDLoc, "too few operands for instruction"); 4753 } 4754 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4755 if (ErrorLoc == SMLoc()) 4756 ErrorLoc = IDLoc; 4757 } 4758 return Error(ErrorLoc, "invalid operand for instruction"); 4759 } 4760 4761 case Match_PreferE32: 4762 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4763 "should be encoded as e32"); 4764 case Match_MnemonicFail: 4765 llvm_unreachable("Invalid instructions should have been handled already"); 4766 } 4767 llvm_unreachable("Implement any new match types added!"); 4768 } 4769 4770 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4771 int64_t Tmp = -1; 4772 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4773 return true; 4774 } 4775 if (getParser().parseAbsoluteExpression(Tmp)) { 4776 return true; 4777 } 4778 Ret = static_cast<uint32_t>(Tmp); 4779 return false; 4780 } 4781 4782 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4783 uint32_t &Minor) { 4784 if (ParseAsAbsoluteExpression(Major)) 4785 return TokError("invalid major version"); 4786 4787 if (!trySkipToken(AsmToken::Comma)) 4788 return TokError("minor version number required, comma expected"); 4789 4790 if (ParseAsAbsoluteExpression(Minor)) 4791 return TokError("invalid minor version"); 4792 4793 return false; 4794 } 4795 4796 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4797 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4798 return TokError("directive only supported for amdgcn architecture"); 4799 4800 std::string TargetIDDirective; 4801 SMLoc TargetStart = getTok().getLoc(); 4802 if (getParser().parseEscapedString(TargetIDDirective)) 4803 return true; 4804 4805 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4806 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4807 return getParser().Error(TargetRange.Start, 4808 (Twine(".amdgcn_target directive's target id ") + 4809 Twine(TargetIDDirective) + 4810 Twine(" does not match the specified target id ") + 4811 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4812 4813 return false; 4814 } 4815 4816 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4817 return Error(Range.Start, "value out of range", Range); 4818 } 4819 4820 bool AMDGPUAsmParser::calculateGPRBlocks( 4821 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4822 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4823 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4824 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4825 // TODO(scott.linder): These calculations are duplicated from 4826 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4827 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4828 4829 unsigned NumVGPRs = NextFreeVGPR; 4830 unsigned NumSGPRs = NextFreeSGPR; 4831 4832 if (Version.Major >= 10) 4833 NumSGPRs = 0; 4834 else { 4835 unsigned MaxAddressableNumSGPRs = 4836 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4837 4838 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4839 NumSGPRs > MaxAddressableNumSGPRs) 4840 return OutOfRangeError(SGPRRange); 4841 4842 NumSGPRs += 4843 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4844 4845 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4846 NumSGPRs > MaxAddressableNumSGPRs) 4847 return OutOfRangeError(SGPRRange); 4848 4849 if (Features.test(FeatureSGPRInitBug)) 4850 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4851 } 4852 4853 VGPRBlocks = 4854 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4855 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4856 4857 return false; 4858 } 4859 4860 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4861 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4862 return TokError("directive only supported for amdgcn architecture"); 4863 4864 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4865 return TokError("directive only supported for amdhsa OS"); 4866 4867 StringRef KernelName; 4868 if (getParser().parseIdentifier(KernelName)) 4869 return true; 4870 4871 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4872 4873 StringSet<> Seen; 4874 4875 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4876 4877 SMRange VGPRRange; 4878 uint64_t NextFreeVGPR = 0; 4879 uint64_t AccumOffset = 0; 4880 uint64_t SharedVGPRCount = 0; 4881 SMRange SGPRRange; 4882 uint64_t NextFreeSGPR = 0; 4883 4884 // Count the number of user SGPRs implied from the enabled feature bits. 4885 unsigned ImpliedUserSGPRCount = 0; 4886 4887 // Track if the asm explicitly contains the directive for the user SGPR 4888 // count. 4889 Optional<unsigned> ExplicitUserSGPRCount; 4890 bool ReserveVCC = true; 4891 bool ReserveFlatScr = true; 4892 Optional<bool> EnableWavefrontSize32; 4893 4894 while (true) { 4895 while (trySkipToken(AsmToken::EndOfStatement)); 4896 4897 StringRef ID; 4898 SMRange IDRange = getTok().getLocRange(); 4899 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4900 return true; 4901 4902 if (ID == ".end_amdhsa_kernel") 4903 break; 4904 4905 if (!Seen.insert(ID).second) 4906 return TokError(".amdhsa_ directives cannot be repeated"); 4907 4908 SMLoc ValStart = getLoc(); 4909 int64_t IVal; 4910 if (getParser().parseAbsoluteExpression(IVal)) 4911 return true; 4912 SMLoc ValEnd = getLoc(); 4913 SMRange ValRange = SMRange(ValStart, ValEnd); 4914 4915 if (IVal < 0) 4916 return OutOfRangeError(ValRange); 4917 4918 uint64_t Val = IVal; 4919 4920 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4921 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4922 return OutOfRangeError(RANGE); \ 4923 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4924 4925 if (ID == ".amdhsa_group_segment_fixed_size") { 4926 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4927 return OutOfRangeError(ValRange); 4928 KD.group_segment_fixed_size = Val; 4929 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4930 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4931 return OutOfRangeError(ValRange); 4932 KD.private_segment_fixed_size = Val; 4933 } else if (ID == ".amdhsa_kernarg_size") { 4934 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4935 return OutOfRangeError(ValRange); 4936 KD.kernarg_size = Val; 4937 } else if (ID == ".amdhsa_user_sgpr_count") { 4938 ExplicitUserSGPRCount = Val; 4939 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4940 if (hasArchitectedFlatScratch()) 4941 return Error(IDRange.Start, 4942 "directive is not supported with architected flat scratch", 4943 IDRange); 4944 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4945 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4946 Val, ValRange); 4947 if (Val) 4948 ImpliedUserSGPRCount += 4; 4949 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4950 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4951 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4952 ValRange); 4953 if (Val) 4954 ImpliedUserSGPRCount += 2; 4955 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4956 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4957 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4958 ValRange); 4959 if (Val) 4960 ImpliedUserSGPRCount += 2; 4961 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4962 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4963 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4964 Val, ValRange); 4965 if (Val) 4966 ImpliedUserSGPRCount += 2; 4967 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4968 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4969 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4970 ValRange); 4971 if (Val) 4972 ImpliedUserSGPRCount += 2; 4973 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4974 if (hasArchitectedFlatScratch()) 4975 return Error(IDRange.Start, 4976 "directive is not supported with architected flat scratch", 4977 IDRange); 4978 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4979 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4980 ValRange); 4981 if (Val) 4982 ImpliedUserSGPRCount += 2; 4983 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4984 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4985 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4986 Val, ValRange); 4987 if (Val) 4988 ImpliedUserSGPRCount += 1; 4989 } else if (ID == ".amdhsa_wavefront_size32") { 4990 if (IVersion.Major < 10) 4991 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4992 EnableWavefrontSize32 = Val; 4993 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4994 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4995 Val, ValRange); 4996 } else if (ID == ".amdhsa_uses_dynamic_stack") { 4997 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4998 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 4999 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5000 if (hasArchitectedFlatScratch()) 5001 return Error(IDRange.Start, 5002 "directive is not supported with architected flat scratch", 5003 IDRange); 5004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5005 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5006 } else if (ID == ".amdhsa_enable_private_segment") { 5007 if (!hasArchitectedFlatScratch()) 5008 return Error( 5009 IDRange.Start, 5010 "directive is not supported without architected flat scratch", 5011 IDRange); 5012 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5013 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5014 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5015 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5016 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5017 ValRange); 5018 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5020 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5021 ValRange); 5022 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5024 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5025 ValRange); 5026 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5028 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5029 ValRange); 5030 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5031 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5032 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5033 ValRange); 5034 } else if (ID == ".amdhsa_next_free_vgpr") { 5035 VGPRRange = ValRange; 5036 NextFreeVGPR = Val; 5037 } else if (ID == ".amdhsa_next_free_sgpr") { 5038 SGPRRange = ValRange; 5039 NextFreeSGPR = Val; 5040 } else if (ID == ".amdhsa_accum_offset") { 5041 if (!isGFX90A()) 5042 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5043 AccumOffset = Val; 5044 } else if (ID == ".amdhsa_reserve_vcc") { 5045 if (!isUInt<1>(Val)) 5046 return OutOfRangeError(ValRange); 5047 ReserveVCC = Val; 5048 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5049 if (IVersion.Major < 7) 5050 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5051 if (hasArchitectedFlatScratch()) 5052 return Error(IDRange.Start, 5053 "directive is not supported with architected flat scratch", 5054 IDRange); 5055 if (!isUInt<1>(Val)) 5056 return OutOfRangeError(ValRange); 5057 ReserveFlatScr = Val; 5058 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5059 if (IVersion.Major < 8) 5060 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5061 if (!isUInt<1>(Val)) 5062 return OutOfRangeError(ValRange); 5063 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5064 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5065 IDRange); 5066 } else if (ID == ".amdhsa_float_round_mode_32") { 5067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5068 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5069 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5070 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5071 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5072 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5073 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5074 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5075 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5076 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5077 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5078 ValRange); 5079 } else if (ID == ".amdhsa_dx10_clamp") { 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5081 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5082 } else if (ID == ".amdhsa_ieee_mode") { 5083 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5084 Val, ValRange); 5085 } else if (ID == ".amdhsa_fp16_overflow") { 5086 if (IVersion.Major < 9) 5087 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5089 ValRange); 5090 } else if (ID == ".amdhsa_tg_split") { 5091 if (!isGFX90A()) 5092 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5094 ValRange); 5095 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5096 if (IVersion.Major < 10) 5097 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5098 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5099 ValRange); 5100 } else if (ID == ".amdhsa_memory_ordered") { 5101 if (IVersion.Major < 10) 5102 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5104 ValRange); 5105 } else if (ID == ".amdhsa_forward_progress") { 5106 if (IVersion.Major < 10) 5107 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5109 ValRange); 5110 } else if (ID == ".amdhsa_shared_vgpr_count") { 5111 if (IVersion.Major < 10) 5112 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5113 SharedVGPRCount = Val; 5114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5115 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5116 ValRange); 5117 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5118 PARSE_BITS_ENTRY( 5119 KD.compute_pgm_rsrc2, 5120 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5121 ValRange); 5122 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5123 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5124 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5125 Val, ValRange); 5126 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5127 PARSE_BITS_ENTRY( 5128 KD.compute_pgm_rsrc2, 5129 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5130 ValRange); 5131 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5132 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5133 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5134 Val, ValRange); 5135 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5136 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5137 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5138 Val, ValRange); 5139 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5140 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5141 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5142 Val, ValRange); 5143 } else if (ID == ".amdhsa_exception_int_div_zero") { 5144 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5145 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5146 Val, ValRange); 5147 } else { 5148 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5149 } 5150 5151 #undef PARSE_BITS_ENTRY 5152 } 5153 5154 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5155 return TokError(".amdhsa_next_free_vgpr directive is required"); 5156 5157 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5158 return TokError(".amdhsa_next_free_sgpr directive is required"); 5159 5160 unsigned VGPRBlocks; 5161 unsigned SGPRBlocks; 5162 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5163 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5164 EnableWavefrontSize32, NextFreeVGPR, 5165 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5166 SGPRBlocks)) 5167 return true; 5168 5169 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5170 VGPRBlocks)) 5171 return OutOfRangeError(VGPRRange); 5172 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5173 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5174 5175 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5176 SGPRBlocks)) 5177 return OutOfRangeError(SGPRRange); 5178 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5179 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5180 SGPRBlocks); 5181 5182 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5183 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5184 "enabled user SGPRs"); 5185 5186 unsigned UserSGPRCount = 5187 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5188 5189 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5190 return TokError("too many user SGPRs enabled"); 5191 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5192 UserSGPRCount); 5193 5194 if (isGFX90A()) { 5195 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5196 return TokError(".amdhsa_accum_offset directive is required"); 5197 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5198 return TokError("accum_offset should be in range [4..256] in " 5199 "increments of 4"); 5200 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5201 return TokError("accum_offset exceeds total VGPR allocation"); 5202 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5203 (AccumOffset / 4 - 1)); 5204 } 5205 5206 if (IVersion.Major == 10) { 5207 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5208 if (SharedVGPRCount && EnableWavefrontSize32) { 5209 return TokError("shared_vgpr_count directive not valid on " 5210 "wavefront size 32"); 5211 } 5212 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5213 return TokError("shared_vgpr_count*2 + " 5214 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5215 "exceed 63\n"); 5216 } 5217 } 5218 5219 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5220 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5221 ReserveFlatScr); 5222 return false; 5223 } 5224 5225 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5226 uint32_t Major; 5227 uint32_t Minor; 5228 5229 if (ParseDirectiveMajorMinor(Major, Minor)) 5230 return true; 5231 5232 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5233 return false; 5234 } 5235 5236 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5237 uint32_t Major; 5238 uint32_t Minor; 5239 uint32_t Stepping; 5240 StringRef VendorName; 5241 StringRef ArchName; 5242 5243 // If this directive has no arguments, then use the ISA version for the 5244 // targeted GPU. 5245 if (isToken(AsmToken::EndOfStatement)) { 5246 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5247 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5248 ISA.Stepping, 5249 "AMD", "AMDGPU"); 5250 return false; 5251 } 5252 5253 if (ParseDirectiveMajorMinor(Major, Minor)) 5254 return true; 5255 5256 if (!trySkipToken(AsmToken::Comma)) 5257 return TokError("stepping version number required, comma expected"); 5258 5259 if (ParseAsAbsoluteExpression(Stepping)) 5260 return TokError("invalid stepping version"); 5261 5262 if (!trySkipToken(AsmToken::Comma)) 5263 return TokError("vendor name required, comma expected"); 5264 5265 if (!parseString(VendorName, "invalid vendor name")) 5266 return true; 5267 5268 if (!trySkipToken(AsmToken::Comma)) 5269 return TokError("arch name required, comma expected"); 5270 5271 if (!parseString(ArchName, "invalid arch name")) 5272 return true; 5273 5274 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5275 VendorName, ArchName); 5276 return false; 5277 } 5278 5279 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5280 amd_kernel_code_t &Header) { 5281 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5282 // assembly for backwards compatibility. 5283 if (ID == "max_scratch_backing_memory_byte_size") { 5284 Parser.eatToEndOfStatement(); 5285 return false; 5286 } 5287 5288 SmallString<40> ErrStr; 5289 raw_svector_ostream Err(ErrStr); 5290 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5291 return TokError(Err.str()); 5292 } 5293 Lex(); 5294 5295 if (ID == "enable_wavefront_size32") { 5296 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5297 if (!isGFX10Plus()) 5298 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5299 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5300 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5301 } else { 5302 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5303 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5304 } 5305 } 5306 5307 if (ID == "wavefront_size") { 5308 if (Header.wavefront_size == 5) { 5309 if (!isGFX10Plus()) 5310 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5311 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5312 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5313 } else if (Header.wavefront_size == 6) { 5314 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5315 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5316 } 5317 } 5318 5319 if (ID == "enable_wgp_mode") { 5320 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5321 !isGFX10Plus()) 5322 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5323 } 5324 5325 if (ID == "enable_mem_ordered") { 5326 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5327 !isGFX10Plus()) 5328 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5329 } 5330 5331 if (ID == "enable_fwd_progress") { 5332 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5333 !isGFX10Plus()) 5334 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5335 } 5336 5337 return false; 5338 } 5339 5340 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5341 amd_kernel_code_t Header; 5342 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5343 5344 while (true) { 5345 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5346 // will set the current token to EndOfStatement. 5347 while(trySkipToken(AsmToken::EndOfStatement)); 5348 5349 StringRef ID; 5350 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5351 return true; 5352 5353 if (ID == ".end_amd_kernel_code_t") 5354 break; 5355 5356 if (ParseAMDKernelCodeTValue(ID, Header)) 5357 return true; 5358 } 5359 5360 getTargetStreamer().EmitAMDKernelCodeT(Header); 5361 5362 return false; 5363 } 5364 5365 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5366 StringRef KernelName; 5367 if (!parseId(KernelName, "expected symbol name")) 5368 return true; 5369 5370 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5371 ELF::STT_AMDGPU_HSA_KERNEL); 5372 5373 KernelScope.initialize(getContext()); 5374 return false; 5375 } 5376 5377 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5378 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5379 return Error(getLoc(), 5380 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5381 "architectures"); 5382 } 5383 5384 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5385 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5386 return Error(getParser().getTok().getLoc(), "target id must match options"); 5387 5388 getTargetStreamer().EmitISAVersion(); 5389 Lex(); 5390 5391 return false; 5392 } 5393 5394 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5395 const char *AssemblerDirectiveBegin; 5396 const char *AssemblerDirectiveEnd; 5397 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5398 isHsaAbiVersion3AndAbove(&getSTI()) 5399 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5400 HSAMD::V3::AssemblerDirectiveEnd) 5401 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5402 HSAMD::AssemblerDirectiveEnd); 5403 5404 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5405 return Error(getLoc(), 5406 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5407 "not available on non-amdhsa OSes")).str()); 5408 } 5409 5410 std::string HSAMetadataString; 5411 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5412 HSAMetadataString)) 5413 return true; 5414 5415 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5416 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5417 return Error(getLoc(), "invalid HSA metadata"); 5418 } else { 5419 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5420 return Error(getLoc(), "invalid HSA metadata"); 5421 } 5422 5423 return false; 5424 } 5425 5426 /// Common code to parse out a block of text (typically YAML) between start and 5427 /// end directives. 5428 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5429 const char *AssemblerDirectiveEnd, 5430 std::string &CollectString) { 5431 5432 raw_string_ostream CollectStream(CollectString); 5433 5434 getLexer().setSkipSpace(false); 5435 5436 bool FoundEnd = false; 5437 while (!isToken(AsmToken::Eof)) { 5438 while (isToken(AsmToken::Space)) { 5439 CollectStream << getTokenStr(); 5440 Lex(); 5441 } 5442 5443 if (trySkipId(AssemblerDirectiveEnd)) { 5444 FoundEnd = true; 5445 break; 5446 } 5447 5448 CollectStream << Parser.parseStringToEndOfStatement() 5449 << getContext().getAsmInfo()->getSeparatorString(); 5450 5451 Parser.eatToEndOfStatement(); 5452 } 5453 5454 getLexer().setSkipSpace(true); 5455 5456 if (isToken(AsmToken::Eof) && !FoundEnd) { 5457 return TokError(Twine("expected directive ") + 5458 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5459 } 5460 5461 CollectStream.flush(); 5462 return false; 5463 } 5464 5465 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5466 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5467 std::string String; 5468 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5469 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5470 return true; 5471 5472 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5473 if (!PALMetadata->setFromString(String)) 5474 return Error(getLoc(), "invalid PAL metadata"); 5475 return false; 5476 } 5477 5478 /// Parse the assembler directive for old linear-format PAL metadata. 5479 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5480 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5481 return Error(getLoc(), 5482 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5483 "not available on non-amdpal OSes")).str()); 5484 } 5485 5486 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5487 PALMetadata->setLegacy(); 5488 for (;;) { 5489 uint32_t Key, Value; 5490 if (ParseAsAbsoluteExpression(Key)) { 5491 return TokError(Twine("invalid value in ") + 5492 Twine(PALMD::AssemblerDirective)); 5493 } 5494 if (!trySkipToken(AsmToken::Comma)) { 5495 return TokError(Twine("expected an even number of values in ") + 5496 Twine(PALMD::AssemblerDirective)); 5497 } 5498 if (ParseAsAbsoluteExpression(Value)) { 5499 return TokError(Twine("invalid value in ") + 5500 Twine(PALMD::AssemblerDirective)); 5501 } 5502 PALMetadata->setRegister(Key, Value); 5503 if (!trySkipToken(AsmToken::Comma)) 5504 break; 5505 } 5506 return false; 5507 } 5508 5509 /// ParseDirectiveAMDGPULDS 5510 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5511 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5512 if (getParser().checkForValidSection()) 5513 return true; 5514 5515 StringRef Name; 5516 SMLoc NameLoc = getLoc(); 5517 if (getParser().parseIdentifier(Name)) 5518 return TokError("expected identifier in directive"); 5519 5520 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5521 if (parseToken(AsmToken::Comma, "expected ','")) 5522 return true; 5523 5524 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5525 5526 int64_t Size; 5527 SMLoc SizeLoc = getLoc(); 5528 if (getParser().parseAbsoluteExpression(Size)) 5529 return true; 5530 if (Size < 0) 5531 return Error(SizeLoc, "size must be non-negative"); 5532 if (Size > LocalMemorySize) 5533 return Error(SizeLoc, "size is too large"); 5534 5535 int64_t Alignment = 4; 5536 if (trySkipToken(AsmToken::Comma)) { 5537 SMLoc AlignLoc = getLoc(); 5538 if (getParser().parseAbsoluteExpression(Alignment)) 5539 return true; 5540 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5541 return Error(AlignLoc, "alignment must be a power of two"); 5542 5543 // Alignment larger than the size of LDS is possible in theory, as long 5544 // as the linker manages to place to symbol at address 0, but we do want 5545 // to make sure the alignment fits nicely into a 32-bit integer. 5546 if (Alignment >= 1u << 31) 5547 return Error(AlignLoc, "alignment is too large"); 5548 } 5549 5550 if (parseEOL()) 5551 return true; 5552 5553 Symbol->redefineIfPossible(); 5554 if (!Symbol->isUndefined()) 5555 return Error(NameLoc, "invalid symbol redefinition"); 5556 5557 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5558 return false; 5559 } 5560 5561 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5562 StringRef IDVal = DirectiveID.getString(); 5563 5564 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5565 if (IDVal == ".amdhsa_kernel") 5566 return ParseDirectiveAMDHSAKernel(); 5567 5568 // TODO: Restructure/combine with PAL metadata directive. 5569 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5570 return ParseDirectiveHSAMetadata(); 5571 } else { 5572 if (IDVal == ".hsa_code_object_version") 5573 return ParseDirectiveHSACodeObjectVersion(); 5574 5575 if (IDVal == ".hsa_code_object_isa") 5576 return ParseDirectiveHSACodeObjectISA(); 5577 5578 if (IDVal == ".amd_kernel_code_t") 5579 return ParseDirectiveAMDKernelCodeT(); 5580 5581 if (IDVal == ".amdgpu_hsa_kernel") 5582 return ParseDirectiveAMDGPUHsaKernel(); 5583 5584 if (IDVal == ".amd_amdgpu_isa") 5585 return ParseDirectiveISAVersion(); 5586 5587 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5588 return ParseDirectiveHSAMetadata(); 5589 } 5590 5591 if (IDVal == ".amdgcn_target") 5592 return ParseDirectiveAMDGCNTarget(); 5593 5594 if (IDVal == ".amdgpu_lds") 5595 return ParseDirectiveAMDGPULDS(); 5596 5597 if (IDVal == PALMD::AssemblerDirectiveBegin) 5598 return ParseDirectivePALMetadataBegin(); 5599 5600 if (IDVal == PALMD::AssemblerDirective) 5601 return ParseDirectivePALMetadata(); 5602 5603 return true; 5604 } 5605 5606 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5607 unsigned RegNo) { 5608 5609 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5610 return isGFX9Plus(); 5611 5612 // GFX10+ has 2 more SGPRs 104 and 105. 5613 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5614 return hasSGPR104_SGPR105(); 5615 5616 switch (RegNo) { 5617 case AMDGPU::SRC_SHARED_BASE: 5618 case AMDGPU::SRC_SHARED_LIMIT: 5619 case AMDGPU::SRC_PRIVATE_BASE: 5620 case AMDGPU::SRC_PRIVATE_LIMIT: 5621 return isGFX9Plus(); 5622 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5623 return isGFX9Plus() && !isGFX11Plus(); 5624 case AMDGPU::TBA: 5625 case AMDGPU::TBA_LO: 5626 case AMDGPU::TBA_HI: 5627 case AMDGPU::TMA: 5628 case AMDGPU::TMA_LO: 5629 case AMDGPU::TMA_HI: 5630 return !isGFX9Plus(); 5631 case AMDGPU::XNACK_MASK: 5632 case AMDGPU::XNACK_MASK_LO: 5633 case AMDGPU::XNACK_MASK_HI: 5634 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5635 case AMDGPU::SGPR_NULL: 5636 return isGFX10Plus(); 5637 default: 5638 break; 5639 } 5640 5641 if (isCI()) 5642 return true; 5643 5644 if (isSI() || isGFX10Plus()) { 5645 // No flat_scr on SI. 5646 // On GFX10Plus flat scratch is not a valid register operand and can only be 5647 // accessed with s_setreg/s_getreg. 5648 switch (RegNo) { 5649 case AMDGPU::FLAT_SCR: 5650 case AMDGPU::FLAT_SCR_LO: 5651 case AMDGPU::FLAT_SCR_HI: 5652 return false; 5653 default: 5654 return true; 5655 } 5656 } 5657 5658 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5659 // SI/CI have. 5660 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5661 return hasSGPR102_SGPR103(); 5662 5663 return true; 5664 } 5665 5666 OperandMatchResultTy 5667 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5668 OperandMode Mode) { 5669 OperandMatchResultTy ResTy = parseVOPD(Operands); 5670 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5671 isToken(AsmToken::EndOfStatement)) 5672 return ResTy; 5673 5674 // Try to parse with a custom parser 5675 ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5676 5677 // If we successfully parsed the operand or if there as an error parsing, 5678 // we are done. 5679 // 5680 // If we are parsing after we reach EndOfStatement then this means we 5681 // are appending default values to the Operands list. This is only done 5682 // by custom parser, so we shouldn't continue on to the generic parsing. 5683 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5684 isToken(AsmToken::EndOfStatement)) 5685 return ResTy; 5686 5687 SMLoc RBraceLoc; 5688 SMLoc LBraceLoc = getLoc(); 5689 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5690 unsigned Prefix = Operands.size(); 5691 5692 for (;;) { 5693 auto Loc = getLoc(); 5694 ResTy = parseReg(Operands); 5695 if (ResTy == MatchOperand_NoMatch) 5696 Error(Loc, "expected a register"); 5697 if (ResTy != MatchOperand_Success) 5698 return MatchOperand_ParseFail; 5699 5700 RBraceLoc = getLoc(); 5701 if (trySkipToken(AsmToken::RBrac)) 5702 break; 5703 5704 if (!skipToken(AsmToken::Comma, 5705 "expected a comma or a closing square bracket")) { 5706 return MatchOperand_ParseFail; 5707 } 5708 } 5709 5710 if (Operands.size() - Prefix > 1) { 5711 Operands.insert(Operands.begin() + Prefix, 5712 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5713 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5714 } 5715 5716 return MatchOperand_Success; 5717 } 5718 5719 return parseRegOrImm(Operands); 5720 } 5721 5722 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5723 // Clear any forced encodings from the previous instruction. 5724 setForcedEncodingSize(0); 5725 setForcedDPP(false); 5726 setForcedSDWA(false); 5727 5728 if (Name.endswith("_e64_dpp")) { 5729 setForcedDPP(true); 5730 setForcedEncodingSize(64); 5731 return Name.substr(0, Name.size() - 8); 5732 } else if (Name.endswith("_e64")) { 5733 setForcedEncodingSize(64); 5734 return Name.substr(0, Name.size() - 4); 5735 } else if (Name.endswith("_e32")) { 5736 setForcedEncodingSize(32); 5737 return Name.substr(0, Name.size() - 4); 5738 } else if (Name.endswith("_dpp")) { 5739 setForcedDPP(true); 5740 return Name.substr(0, Name.size() - 4); 5741 } else if (Name.endswith("_sdwa")) { 5742 setForcedSDWA(true); 5743 return Name.substr(0, Name.size() - 5); 5744 } 5745 return Name; 5746 } 5747 5748 static void applyMnemonicAliases(StringRef &Mnemonic, 5749 const FeatureBitset &Features, 5750 unsigned VariantID); 5751 5752 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5753 StringRef Name, 5754 SMLoc NameLoc, OperandVector &Operands) { 5755 // Add the instruction mnemonic 5756 Name = parseMnemonicSuffix(Name); 5757 5758 // If the target architecture uses MnemonicAlias, call it here to parse 5759 // operands correctly. 5760 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5761 5762 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5763 5764 bool IsMIMG = Name.startswith("image_"); 5765 5766 while (!trySkipToken(AsmToken::EndOfStatement)) { 5767 OperandMode Mode = OperandMode_Default; 5768 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5769 Mode = OperandMode_NSA; 5770 CPolSeen = 0; 5771 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5772 5773 if (Res != MatchOperand_Success) { 5774 checkUnsupportedInstruction(Name, NameLoc); 5775 if (!Parser.hasPendingError()) { 5776 // FIXME: use real operand location rather than the current location. 5777 StringRef Msg = 5778 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5779 "not a valid operand."; 5780 Error(getLoc(), Msg); 5781 } 5782 while (!trySkipToken(AsmToken::EndOfStatement)) { 5783 lex(); 5784 } 5785 return true; 5786 } 5787 5788 // Eat the comma or space if there is one. 5789 trySkipToken(AsmToken::Comma); 5790 } 5791 5792 return false; 5793 } 5794 5795 //===----------------------------------------------------------------------===// 5796 // Utility functions 5797 //===----------------------------------------------------------------------===// 5798 5799 OperandMatchResultTy 5800 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5801 5802 if (!trySkipId(Prefix, AsmToken::Colon)) 5803 return MatchOperand_NoMatch; 5804 5805 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5806 } 5807 5808 OperandMatchResultTy 5809 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5810 AMDGPUOperand::ImmTy ImmTy, 5811 bool (*ConvertResult)(int64_t&)) { 5812 SMLoc S = getLoc(); 5813 int64_t Value = 0; 5814 5815 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5816 if (Res != MatchOperand_Success) 5817 return Res; 5818 5819 if (ConvertResult && !ConvertResult(Value)) { 5820 Error(S, "invalid " + StringRef(Prefix) + " value."); 5821 } 5822 5823 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5824 return MatchOperand_Success; 5825 } 5826 5827 OperandMatchResultTy 5828 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5829 OperandVector &Operands, 5830 AMDGPUOperand::ImmTy ImmTy, 5831 bool (*ConvertResult)(int64_t&)) { 5832 SMLoc S = getLoc(); 5833 if (!trySkipId(Prefix, AsmToken::Colon)) 5834 return MatchOperand_NoMatch; 5835 5836 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5837 return MatchOperand_ParseFail; 5838 5839 unsigned Val = 0; 5840 const unsigned MaxSize = 4; 5841 5842 // FIXME: How to verify the number of elements matches the number of src 5843 // operands? 5844 for (int I = 0; ; ++I) { 5845 int64_t Op; 5846 SMLoc Loc = getLoc(); 5847 if (!parseExpr(Op)) 5848 return MatchOperand_ParseFail; 5849 5850 if (Op != 0 && Op != 1) { 5851 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5852 return MatchOperand_ParseFail; 5853 } 5854 5855 Val |= (Op << I); 5856 5857 if (trySkipToken(AsmToken::RBrac)) 5858 break; 5859 5860 if (I + 1 == MaxSize) { 5861 Error(getLoc(), "expected a closing square bracket"); 5862 return MatchOperand_ParseFail; 5863 } 5864 5865 if (!skipToken(AsmToken::Comma, "expected a comma")) 5866 return MatchOperand_ParseFail; 5867 } 5868 5869 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5870 return MatchOperand_Success; 5871 } 5872 5873 OperandMatchResultTy 5874 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5875 AMDGPUOperand::ImmTy ImmTy) { 5876 int64_t Bit; 5877 SMLoc S = getLoc(); 5878 5879 if (trySkipId(Name)) { 5880 Bit = 1; 5881 } else if (trySkipId("no", Name)) { 5882 Bit = 0; 5883 } else { 5884 return MatchOperand_NoMatch; 5885 } 5886 5887 if (Name == "r128" && !hasMIMG_R128()) { 5888 Error(S, "r128 modifier is not supported on this GPU"); 5889 return MatchOperand_ParseFail; 5890 } 5891 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5892 Error(S, "a16 modifier is not supported on this GPU"); 5893 return MatchOperand_ParseFail; 5894 } 5895 5896 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5897 ImmTy = AMDGPUOperand::ImmTyR128A16; 5898 5899 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5900 return MatchOperand_Success; 5901 } 5902 5903 OperandMatchResultTy 5904 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5905 unsigned CPolOn = 0; 5906 unsigned CPolOff = 0; 5907 SMLoc S = getLoc(); 5908 5909 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5910 if (isGFX940() && !Mnemo.startswith("s_")) { 5911 if (trySkipId("sc0")) 5912 CPolOn = AMDGPU::CPol::SC0; 5913 else if (trySkipId("nosc0")) 5914 CPolOff = AMDGPU::CPol::SC0; 5915 else if (trySkipId("nt")) 5916 CPolOn = AMDGPU::CPol::NT; 5917 else if (trySkipId("nont")) 5918 CPolOff = AMDGPU::CPol::NT; 5919 else if (trySkipId("sc1")) 5920 CPolOn = AMDGPU::CPol::SC1; 5921 else if (trySkipId("nosc1")) 5922 CPolOff = AMDGPU::CPol::SC1; 5923 else 5924 return MatchOperand_NoMatch; 5925 } 5926 else if (trySkipId("glc")) 5927 CPolOn = AMDGPU::CPol::GLC; 5928 else if (trySkipId("noglc")) 5929 CPolOff = AMDGPU::CPol::GLC; 5930 else if (trySkipId("slc")) 5931 CPolOn = AMDGPU::CPol::SLC; 5932 else if (trySkipId("noslc")) 5933 CPolOff = AMDGPU::CPol::SLC; 5934 else if (trySkipId("dlc")) 5935 CPolOn = AMDGPU::CPol::DLC; 5936 else if (trySkipId("nodlc")) 5937 CPolOff = AMDGPU::CPol::DLC; 5938 else if (trySkipId("scc")) 5939 CPolOn = AMDGPU::CPol::SCC; 5940 else if (trySkipId("noscc")) 5941 CPolOff = AMDGPU::CPol::SCC; 5942 else 5943 return MatchOperand_NoMatch; 5944 5945 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5946 Error(S, "dlc modifier is not supported on this GPU"); 5947 return MatchOperand_ParseFail; 5948 } 5949 5950 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5951 Error(S, "scc modifier is not supported on this GPU"); 5952 return MatchOperand_ParseFail; 5953 } 5954 5955 if (CPolSeen & (CPolOn | CPolOff)) { 5956 Error(S, "duplicate cache policy modifier"); 5957 return MatchOperand_ParseFail; 5958 } 5959 5960 CPolSeen |= (CPolOn | CPolOff); 5961 5962 for (unsigned I = 1; I != Operands.size(); ++I) { 5963 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5964 if (Op.isCPol()) { 5965 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5966 return MatchOperand_Success; 5967 } 5968 } 5969 5970 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5971 AMDGPUOperand::ImmTyCPol)); 5972 5973 return MatchOperand_Success; 5974 } 5975 5976 static void addOptionalImmOperand( 5977 MCInst& Inst, const OperandVector& Operands, 5978 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5979 AMDGPUOperand::ImmTy ImmT, 5980 int64_t Default = 0) { 5981 auto i = OptionalIdx.find(ImmT); 5982 if (i != OptionalIdx.end()) { 5983 unsigned Idx = i->second; 5984 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5985 } else { 5986 Inst.addOperand(MCOperand::createImm(Default)); 5987 } 5988 } 5989 5990 OperandMatchResultTy 5991 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5992 StringRef &Value, 5993 SMLoc &StringLoc) { 5994 if (!trySkipId(Prefix, AsmToken::Colon)) 5995 return MatchOperand_NoMatch; 5996 5997 StringLoc = getLoc(); 5998 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5999 : MatchOperand_ParseFail; 6000 } 6001 6002 //===----------------------------------------------------------------------===// 6003 // MTBUF format 6004 //===----------------------------------------------------------------------===// 6005 6006 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6007 int64_t MaxVal, 6008 int64_t &Fmt) { 6009 int64_t Val; 6010 SMLoc Loc = getLoc(); 6011 6012 auto Res = parseIntWithPrefix(Pref, Val); 6013 if (Res == MatchOperand_ParseFail) 6014 return false; 6015 if (Res == MatchOperand_NoMatch) 6016 return true; 6017 6018 if (Val < 0 || Val > MaxVal) { 6019 Error(Loc, Twine("out of range ", StringRef(Pref))); 6020 return false; 6021 } 6022 6023 Fmt = Val; 6024 return true; 6025 } 6026 6027 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6028 // values to live in a joint format operand in the MCInst encoding. 6029 OperandMatchResultTy 6030 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6031 using namespace llvm::AMDGPU::MTBUFFormat; 6032 6033 int64_t Dfmt = DFMT_UNDEF; 6034 int64_t Nfmt = NFMT_UNDEF; 6035 6036 // dfmt and nfmt can appear in either order, and each is optional. 6037 for (int I = 0; I < 2; ++I) { 6038 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6039 return MatchOperand_ParseFail; 6040 6041 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6042 return MatchOperand_ParseFail; 6043 } 6044 // Skip optional comma between dfmt/nfmt 6045 // but guard against 2 commas following each other. 6046 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6047 !peekToken().is(AsmToken::Comma)) { 6048 trySkipToken(AsmToken::Comma); 6049 } 6050 } 6051 6052 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6053 return MatchOperand_NoMatch; 6054 6055 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6056 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6057 6058 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6059 return MatchOperand_Success; 6060 } 6061 6062 OperandMatchResultTy 6063 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6064 using namespace llvm::AMDGPU::MTBUFFormat; 6065 6066 int64_t Fmt = UFMT_UNDEF; 6067 6068 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6069 return MatchOperand_ParseFail; 6070 6071 if (Fmt == UFMT_UNDEF) 6072 return MatchOperand_NoMatch; 6073 6074 Format = Fmt; 6075 return MatchOperand_Success; 6076 } 6077 6078 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6079 int64_t &Nfmt, 6080 StringRef FormatStr, 6081 SMLoc Loc) { 6082 using namespace llvm::AMDGPU::MTBUFFormat; 6083 int64_t Format; 6084 6085 Format = getDfmt(FormatStr); 6086 if (Format != DFMT_UNDEF) { 6087 Dfmt = Format; 6088 return true; 6089 } 6090 6091 Format = getNfmt(FormatStr, getSTI()); 6092 if (Format != NFMT_UNDEF) { 6093 Nfmt = Format; 6094 return true; 6095 } 6096 6097 Error(Loc, "unsupported format"); 6098 return false; 6099 } 6100 6101 OperandMatchResultTy 6102 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6103 SMLoc FormatLoc, 6104 int64_t &Format) { 6105 using namespace llvm::AMDGPU::MTBUFFormat; 6106 6107 int64_t Dfmt = DFMT_UNDEF; 6108 int64_t Nfmt = NFMT_UNDEF; 6109 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6110 return MatchOperand_ParseFail; 6111 6112 if (trySkipToken(AsmToken::Comma)) { 6113 StringRef Str; 6114 SMLoc Loc = getLoc(); 6115 if (!parseId(Str, "expected a format string") || 6116 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6117 return MatchOperand_ParseFail; 6118 } 6119 if (Dfmt == DFMT_UNDEF) { 6120 Error(Loc, "duplicate numeric format"); 6121 return MatchOperand_ParseFail; 6122 } else if (Nfmt == NFMT_UNDEF) { 6123 Error(Loc, "duplicate data format"); 6124 return MatchOperand_ParseFail; 6125 } 6126 } 6127 6128 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6129 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6130 6131 if (isGFX10Plus()) { 6132 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6133 if (Ufmt == UFMT_UNDEF) { 6134 Error(FormatLoc, "unsupported format"); 6135 return MatchOperand_ParseFail; 6136 } 6137 Format = Ufmt; 6138 } else { 6139 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6140 } 6141 6142 return MatchOperand_Success; 6143 } 6144 6145 OperandMatchResultTy 6146 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6147 SMLoc Loc, 6148 int64_t &Format) { 6149 using namespace llvm::AMDGPU::MTBUFFormat; 6150 6151 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6152 if (Id == UFMT_UNDEF) 6153 return MatchOperand_NoMatch; 6154 6155 if (!isGFX10Plus()) { 6156 Error(Loc, "unified format is not supported on this GPU"); 6157 return MatchOperand_ParseFail; 6158 } 6159 6160 Format = Id; 6161 return MatchOperand_Success; 6162 } 6163 6164 OperandMatchResultTy 6165 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6166 using namespace llvm::AMDGPU::MTBUFFormat; 6167 SMLoc Loc = getLoc(); 6168 6169 if (!parseExpr(Format)) 6170 return MatchOperand_ParseFail; 6171 if (!isValidFormatEncoding(Format, getSTI())) { 6172 Error(Loc, "out of range format"); 6173 return MatchOperand_ParseFail; 6174 } 6175 6176 return MatchOperand_Success; 6177 } 6178 6179 OperandMatchResultTy 6180 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6181 using namespace llvm::AMDGPU::MTBUFFormat; 6182 6183 if (!trySkipId("format", AsmToken::Colon)) 6184 return MatchOperand_NoMatch; 6185 6186 if (trySkipToken(AsmToken::LBrac)) { 6187 StringRef FormatStr; 6188 SMLoc Loc = getLoc(); 6189 if (!parseId(FormatStr, "expected a format string")) 6190 return MatchOperand_ParseFail; 6191 6192 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6193 if (Res == MatchOperand_NoMatch) 6194 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6195 if (Res != MatchOperand_Success) 6196 return Res; 6197 6198 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6199 return MatchOperand_ParseFail; 6200 6201 return MatchOperand_Success; 6202 } 6203 6204 return parseNumericFormat(Format); 6205 } 6206 6207 OperandMatchResultTy 6208 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6209 using namespace llvm::AMDGPU::MTBUFFormat; 6210 6211 int64_t Format = getDefaultFormatEncoding(getSTI()); 6212 OperandMatchResultTy Res; 6213 SMLoc Loc = getLoc(); 6214 6215 // Parse legacy format syntax. 6216 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6217 if (Res == MatchOperand_ParseFail) 6218 return Res; 6219 6220 bool FormatFound = (Res == MatchOperand_Success); 6221 6222 Operands.push_back( 6223 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6224 6225 if (FormatFound) 6226 trySkipToken(AsmToken::Comma); 6227 6228 if (isToken(AsmToken::EndOfStatement)) { 6229 // We are expecting an soffset operand, 6230 // but let matcher handle the error. 6231 return MatchOperand_Success; 6232 } 6233 6234 // Parse soffset. 6235 Res = parseRegOrImm(Operands); 6236 if (Res != MatchOperand_Success) 6237 return Res; 6238 6239 trySkipToken(AsmToken::Comma); 6240 6241 if (!FormatFound) { 6242 Res = parseSymbolicOrNumericFormat(Format); 6243 if (Res == MatchOperand_ParseFail) 6244 return Res; 6245 if (Res == MatchOperand_Success) { 6246 auto Size = Operands.size(); 6247 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6248 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6249 Op.setImm(Format); 6250 } 6251 return MatchOperand_Success; 6252 } 6253 6254 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6255 Error(getLoc(), "duplicate format"); 6256 return MatchOperand_ParseFail; 6257 } 6258 return MatchOperand_Success; 6259 } 6260 6261 //===----------------------------------------------------------------------===// 6262 // ds 6263 //===----------------------------------------------------------------------===// 6264 6265 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6266 const OperandVector &Operands) { 6267 OptionalImmIndexMap OptionalIdx; 6268 6269 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6270 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6271 6272 // Add the register arguments 6273 if (Op.isReg()) { 6274 Op.addRegOperands(Inst, 1); 6275 continue; 6276 } 6277 6278 // Handle optional arguments 6279 OptionalIdx[Op.getImmTy()] = i; 6280 } 6281 6282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6285 6286 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6287 } 6288 6289 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6290 bool IsGdsHardcoded) { 6291 OptionalImmIndexMap OptionalIdx; 6292 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6293 6294 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6295 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6296 6297 // Add the register arguments 6298 if (Op.isReg()) { 6299 Op.addRegOperands(Inst, 1); 6300 continue; 6301 } 6302 6303 if (Op.isToken() && Op.getToken() == "gds") { 6304 IsGdsHardcoded = true; 6305 continue; 6306 } 6307 6308 // Handle optional arguments 6309 OptionalIdx[Op.getImmTy()] = i; 6310 6311 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6312 OffsetType = AMDGPUOperand::ImmTySwizzle; 6313 } 6314 6315 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6316 6317 if (!IsGdsHardcoded) { 6318 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6319 } 6320 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6321 } 6322 6323 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6324 OptionalImmIndexMap OptionalIdx; 6325 6326 unsigned OperandIdx[4]; 6327 unsigned EnMask = 0; 6328 int SrcIdx = 0; 6329 6330 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6331 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6332 6333 // Add the register arguments 6334 if (Op.isReg()) { 6335 assert(SrcIdx < 4); 6336 OperandIdx[SrcIdx] = Inst.size(); 6337 Op.addRegOperands(Inst, 1); 6338 ++SrcIdx; 6339 continue; 6340 } 6341 6342 if (Op.isOff()) { 6343 assert(SrcIdx < 4); 6344 OperandIdx[SrcIdx] = Inst.size(); 6345 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6346 ++SrcIdx; 6347 continue; 6348 } 6349 6350 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6351 Op.addImmOperands(Inst, 1); 6352 continue; 6353 } 6354 6355 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6356 continue; 6357 6358 // Handle optional arguments 6359 OptionalIdx[Op.getImmTy()] = i; 6360 } 6361 6362 assert(SrcIdx == 4); 6363 6364 bool Compr = false; 6365 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6366 Compr = true; 6367 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6368 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6369 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6370 } 6371 6372 for (auto i = 0; i < SrcIdx; ++i) { 6373 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6374 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6375 } 6376 } 6377 6378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6380 6381 Inst.addOperand(MCOperand::createImm(EnMask)); 6382 } 6383 6384 //===----------------------------------------------------------------------===// 6385 // s_waitcnt 6386 //===----------------------------------------------------------------------===// 6387 6388 static bool 6389 encodeCnt( 6390 const AMDGPU::IsaVersion ISA, 6391 int64_t &IntVal, 6392 int64_t CntVal, 6393 bool Saturate, 6394 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6395 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6396 { 6397 bool Failed = false; 6398 6399 IntVal = encode(ISA, IntVal, CntVal); 6400 if (CntVal != decode(ISA, IntVal)) { 6401 if (Saturate) { 6402 IntVal = encode(ISA, IntVal, -1); 6403 } else { 6404 Failed = true; 6405 } 6406 } 6407 return Failed; 6408 } 6409 6410 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6411 6412 SMLoc CntLoc = getLoc(); 6413 StringRef CntName = getTokenStr(); 6414 6415 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6416 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6417 return false; 6418 6419 int64_t CntVal; 6420 SMLoc ValLoc = getLoc(); 6421 if (!parseExpr(CntVal)) 6422 return false; 6423 6424 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6425 6426 bool Failed = true; 6427 bool Sat = CntName.endswith("_sat"); 6428 6429 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6430 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6431 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6432 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6433 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6434 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6435 } else { 6436 Error(CntLoc, "invalid counter name " + CntName); 6437 return false; 6438 } 6439 6440 if (Failed) { 6441 Error(ValLoc, "too large value for " + CntName); 6442 return false; 6443 } 6444 6445 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6446 return false; 6447 6448 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6449 if (isToken(AsmToken::EndOfStatement)) { 6450 Error(getLoc(), "expected a counter name"); 6451 return false; 6452 } 6453 } 6454 6455 return true; 6456 } 6457 6458 OperandMatchResultTy 6459 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6461 int64_t Waitcnt = getWaitcntBitMask(ISA); 6462 SMLoc S = getLoc(); 6463 6464 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6465 while (!isToken(AsmToken::EndOfStatement)) { 6466 if (!parseCnt(Waitcnt)) 6467 return MatchOperand_ParseFail; 6468 } 6469 } else { 6470 if (!parseExpr(Waitcnt)) 6471 return MatchOperand_ParseFail; 6472 } 6473 6474 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6475 return MatchOperand_Success; 6476 } 6477 6478 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6479 SMLoc FieldLoc = getLoc(); 6480 StringRef FieldName = getTokenStr(); 6481 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6482 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6483 return false; 6484 6485 SMLoc ValueLoc = getLoc(); 6486 StringRef ValueName = getTokenStr(); 6487 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6488 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6489 return false; 6490 6491 unsigned Shift; 6492 if (FieldName == "instid0") { 6493 Shift = 0; 6494 } else if (FieldName == "instskip") { 6495 Shift = 4; 6496 } else if (FieldName == "instid1") { 6497 Shift = 7; 6498 } else { 6499 Error(FieldLoc, "invalid field name " + FieldName); 6500 return false; 6501 } 6502 6503 int Value; 6504 if (Shift == 4) { 6505 // Parse values for instskip. 6506 Value = StringSwitch<int>(ValueName) 6507 .Case("SAME", 0) 6508 .Case("NEXT", 1) 6509 .Case("SKIP_1", 2) 6510 .Case("SKIP_2", 3) 6511 .Case("SKIP_3", 4) 6512 .Case("SKIP_4", 5) 6513 .Default(-1); 6514 } else { 6515 // Parse values for instid0 and instid1. 6516 Value = StringSwitch<int>(ValueName) 6517 .Case("NO_DEP", 0) 6518 .Case("VALU_DEP_1", 1) 6519 .Case("VALU_DEP_2", 2) 6520 .Case("VALU_DEP_3", 3) 6521 .Case("VALU_DEP_4", 4) 6522 .Case("TRANS32_DEP_1", 5) 6523 .Case("TRANS32_DEP_2", 6) 6524 .Case("TRANS32_DEP_3", 7) 6525 .Case("FMA_ACCUM_CYCLE_1", 8) 6526 .Case("SALU_CYCLE_1", 9) 6527 .Case("SALU_CYCLE_2", 10) 6528 .Case("SALU_CYCLE_3", 11) 6529 .Default(-1); 6530 } 6531 if (Value < 0) { 6532 Error(ValueLoc, "invalid value name " + ValueName); 6533 return false; 6534 } 6535 6536 Delay |= Value << Shift; 6537 return true; 6538 } 6539 6540 OperandMatchResultTy 6541 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6542 int64_t Delay = 0; 6543 SMLoc S = getLoc(); 6544 6545 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6546 do { 6547 if (!parseDelay(Delay)) 6548 return MatchOperand_ParseFail; 6549 } while (trySkipToken(AsmToken::Pipe)); 6550 } else { 6551 if (!parseExpr(Delay)) 6552 return MatchOperand_ParseFail; 6553 } 6554 6555 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6556 return MatchOperand_Success; 6557 } 6558 6559 bool 6560 AMDGPUOperand::isSWaitCnt() const { 6561 return isImm(); 6562 } 6563 6564 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6565 6566 //===----------------------------------------------------------------------===// 6567 // DepCtr 6568 //===----------------------------------------------------------------------===// 6569 6570 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6571 StringRef DepCtrName) { 6572 switch (ErrorId) { 6573 case OPR_ID_UNKNOWN: 6574 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6575 return; 6576 case OPR_ID_UNSUPPORTED: 6577 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6578 return; 6579 case OPR_ID_DUPLICATE: 6580 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6581 return; 6582 case OPR_VAL_INVALID: 6583 Error(Loc, Twine("invalid value for ", DepCtrName)); 6584 return; 6585 default: 6586 assert(false); 6587 } 6588 } 6589 6590 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6591 6592 using namespace llvm::AMDGPU::DepCtr; 6593 6594 SMLoc DepCtrLoc = getLoc(); 6595 StringRef DepCtrName = getTokenStr(); 6596 6597 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6598 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6599 return false; 6600 6601 int64_t ExprVal; 6602 if (!parseExpr(ExprVal)) 6603 return false; 6604 6605 unsigned PrevOprMask = UsedOprMask; 6606 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6607 6608 if (CntVal < 0) { 6609 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6610 return false; 6611 } 6612 6613 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6614 return false; 6615 6616 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6617 if (isToken(AsmToken::EndOfStatement)) { 6618 Error(getLoc(), "expected a counter name"); 6619 return false; 6620 } 6621 } 6622 6623 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6624 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6625 return true; 6626 } 6627 6628 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6629 using namespace llvm::AMDGPU::DepCtr; 6630 6631 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6632 SMLoc Loc = getLoc(); 6633 6634 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6635 unsigned UsedOprMask = 0; 6636 while (!isToken(AsmToken::EndOfStatement)) { 6637 if (!parseDepCtr(DepCtr, UsedOprMask)) 6638 return MatchOperand_ParseFail; 6639 } 6640 } else { 6641 if (!parseExpr(DepCtr)) 6642 return MatchOperand_ParseFail; 6643 } 6644 6645 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6646 return MatchOperand_Success; 6647 } 6648 6649 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6650 6651 //===----------------------------------------------------------------------===// 6652 // hwreg 6653 //===----------------------------------------------------------------------===// 6654 6655 bool 6656 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6657 OperandInfoTy &Offset, 6658 OperandInfoTy &Width) { 6659 using namespace llvm::AMDGPU::Hwreg; 6660 6661 // The register may be specified by name or using a numeric code 6662 HwReg.Loc = getLoc(); 6663 if (isToken(AsmToken::Identifier) && 6664 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6665 HwReg.IsSymbolic = true; 6666 lex(); // skip register name 6667 } else if (!parseExpr(HwReg.Id, "a register name")) { 6668 return false; 6669 } 6670 6671 if (trySkipToken(AsmToken::RParen)) 6672 return true; 6673 6674 // parse optional params 6675 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6676 return false; 6677 6678 Offset.Loc = getLoc(); 6679 if (!parseExpr(Offset.Id)) 6680 return false; 6681 6682 if (!skipToken(AsmToken::Comma, "expected a comma")) 6683 return false; 6684 6685 Width.Loc = getLoc(); 6686 return parseExpr(Width.Id) && 6687 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6688 } 6689 6690 bool 6691 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6692 const OperandInfoTy &Offset, 6693 const OperandInfoTy &Width) { 6694 6695 using namespace llvm::AMDGPU::Hwreg; 6696 6697 if (HwReg.IsSymbolic) { 6698 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6699 Error(HwReg.Loc, 6700 "specified hardware register is not supported on this GPU"); 6701 return false; 6702 } 6703 } else { 6704 if (!isValidHwreg(HwReg.Id)) { 6705 Error(HwReg.Loc, 6706 "invalid code of hardware register: only 6-bit values are legal"); 6707 return false; 6708 } 6709 } 6710 if (!isValidHwregOffset(Offset.Id)) { 6711 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6712 return false; 6713 } 6714 if (!isValidHwregWidth(Width.Id)) { 6715 Error(Width.Loc, 6716 "invalid bitfield width: only values from 1 to 32 are legal"); 6717 return false; 6718 } 6719 return true; 6720 } 6721 6722 OperandMatchResultTy 6723 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6724 using namespace llvm::AMDGPU::Hwreg; 6725 6726 int64_t ImmVal = 0; 6727 SMLoc Loc = getLoc(); 6728 6729 if (trySkipId("hwreg", AsmToken::LParen)) { 6730 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6731 OperandInfoTy Offset(OFFSET_DEFAULT_); 6732 OperandInfoTy Width(WIDTH_DEFAULT_); 6733 if (parseHwregBody(HwReg, Offset, Width) && 6734 validateHwreg(HwReg, Offset, Width)) { 6735 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6736 } else { 6737 return MatchOperand_ParseFail; 6738 } 6739 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6740 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6741 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6742 return MatchOperand_ParseFail; 6743 } 6744 } else { 6745 return MatchOperand_ParseFail; 6746 } 6747 6748 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6749 return MatchOperand_Success; 6750 } 6751 6752 bool AMDGPUOperand::isHwreg() const { 6753 return isImmTy(ImmTyHwreg); 6754 } 6755 6756 //===----------------------------------------------------------------------===// 6757 // sendmsg 6758 //===----------------------------------------------------------------------===// 6759 6760 bool 6761 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6762 OperandInfoTy &Op, 6763 OperandInfoTy &Stream) { 6764 using namespace llvm::AMDGPU::SendMsg; 6765 6766 Msg.Loc = getLoc(); 6767 if (isToken(AsmToken::Identifier) && 6768 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6769 Msg.IsSymbolic = true; 6770 lex(); // skip message name 6771 } else if (!parseExpr(Msg.Id, "a message name")) { 6772 return false; 6773 } 6774 6775 if (trySkipToken(AsmToken::Comma)) { 6776 Op.IsDefined = true; 6777 Op.Loc = getLoc(); 6778 if (isToken(AsmToken::Identifier) && 6779 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6780 lex(); // skip operation name 6781 } else if (!parseExpr(Op.Id, "an operation name")) { 6782 return false; 6783 } 6784 6785 if (trySkipToken(AsmToken::Comma)) { 6786 Stream.IsDefined = true; 6787 Stream.Loc = getLoc(); 6788 if (!parseExpr(Stream.Id)) 6789 return false; 6790 } 6791 } 6792 6793 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6794 } 6795 6796 bool 6797 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6798 const OperandInfoTy &Op, 6799 const OperandInfoTy &Stream) { 6800 using namespace llvm::AMDGPU::SendMsg; 6801 6802 // Validation strictness depends on whether message is specified 6803 // in a symbolic or in a numeric form. In the latter case 6804 // only encoding possibility is checked. 6805 bool Strict = Msg.IsSymbolic; 6806 6807 if (Strict) { 6808 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6809 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6810 return false; 6811 } 6812 } else { 6813 if (!isValidMsgId(Msg.Id, getSTI())) { 6814 Error(Msg.Loc, "invalid message id"); 6815 return false; 6816 } 6817 } 6818 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6819 if (Op.IsDefined) { 6820 Error(Op.Loc, "message does not support operations"); 6821 } else { 6822 Error(Msg.Loc, "missing message operation"); 6823 } 6824 return false; 6825 } 6826 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6827 Error(Op.Loc, "invalid operation id"); 6828 return false; 6829 } 6830 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6831 Stream.IsDefined) { 6832 Error(Stream.Loc, "message operation does not support streams"); 6833 return false; 6834 } 6835 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6836 Error(Stream.Loc, "invalid message stream id"); 6837 return false; 6838 } 6839 return true; 6840 } 6841 6842 OperandMatchResultTy 6843 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6844 using namespace llvm::AMDGPU::SendMsg; 6845 6846 int64_t ImmVal = 0; 6847 SMLoc Loc = getLoc(); 6848 6849 if (trySkipId("sendmsg", AsmToken::LParen)) { 6850 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6851 OperandInfoTy Op(OP_NONE_); 6852 OperandInfoTy Stream(STREAM_ID_NONE_); 6853 if (parseSendMsgBody(Msg, Op, Stream) && 6854 validateSendMsg(Msg, Op, Stream)) { 6855 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6856 } else { 6857 return MatchOperand_ParseFail; 6858 } 6859 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6860 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6861 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6862 return MatchOperand_ParseFail; 6863 } 6864 } else { 6865 return MatchOperand_ParseFail; 6866 } 6867 6868 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6869 return MatchOperand_Success; 6870 } 6871 6872 bool AMDGPUOperand::isSendMsg() const { 6873 return isImmTy(ImmTySendMsg); 6874 } 6875 6876 //===----------------------------------------------------------------------===// 6877 // v_interp 6878 //===----------------------------------------------------------------------===// 6879 6880 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6881 StringRef Str; 6882 SMLoc S = getLoc(); 6883 6884 if (!parseId(Str)) 6885 return MatchOperand_NoMatch; 6886 6887 int Slot = StringSwitch<int>(Str) 6888 .Case("p10", 0) 6889 .Case("p20", 1) 6890 .Case("p0", 2) 6891 .Default(-1); 6892 6893 if (Slot == -1) { 6894 Error(S, "invalid interpolation slot"); 6895 return MatchOperand_ParseFail; 6896 } 6897 6898 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6899 AMDGPUOperand::ImmTyInterpSlot)); 6900 return MatchOperand_Success; 6901 } 6902 6903 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6904 StringRef Str; 6905 SMLoc S = getLoc(); 6906 6907 if (!parseId(Str)) 6908 return MatchOperand_NoMatch; 6909 6910 if (!Str.startswith("attr")) { 6911 Error(S, "invalid interpolation attribute"); 6912 return MatchOperand_ParseFail; 6913 } 6914 6915 StringRef Chan = Str.take_back(2); 6916 int AttrChan = StringSwitch<int>(Chan) 6917 .Case(".x", 0) 6918 .Case(".y", 1) 6919 .Case(".z", 2) 6920 .Case(".w", 3) 6921 .Default(-1); 6922 if (AttrChan == -1) { 6923 Error(S, "invalid or missing interpolation attribute channel"); 6924 return MatchOperand_ParseFail; 6925 } 6926 6927 Str = Str.drop_back(2).drop_front(4); 6928 6929 uint8_t Attr; 6930 if (Str.getAsInteger(10, Attr)) { 6931 Error(S, "invalid or missing interpolation attribute number"); 6932 return MatchOperand_ParseFail; 6933 } 6934 6935 if (Attr > 63) { 6936 Error(S, "out of bounds interpolation attribute number"); 6937 return MatchOperand_ParseFail; 6938 } 6939 6940 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6941 6942 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6943 AMDGPUOperand::ImmTyInterpAttr)); 6944 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6945 AMDGPUOperand::ImmTyAttrChan)); 6946 return MatchOperand_Success; 6947 } 6948 6949 //===----------------------------------------------------------------------===// 6950 // exp 6951 //===----------------------------------------------------------------------===// 6952 6953 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6954 using namespace llvm::AMDGPU::Exp; 6955 6956 StringRef Str; 6957 SMLoc S = getLoc(); 6958 6959 if (!parseId(Str)) 6960 return MatchOperand_NoMatch; 6961 6962 unsigned Id = getTgtId(Str); 6963 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6964 Error(S, (Id == ET_INVALID) ? 6965 "invalid exp target" : 6966 "exp target is not supported on this GPU"); 6967 return MatchOperand_ParseFail; 6968 } 6969 6970 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6971 AMDGPUOperand::ImmTyExpTgt)); 6972 return MatchOperand_Success; 6973 } 6974 6975 //===----------------------------------------------------------------------===// 6976 // parser helpers 6977 //===----------------------------------------------------------------------===// 6978 6979 bool 6980 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6981 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6982 } 6983 6984 bool 6985 AMDGPUAsmParser::isId(const StringRef Id) const { 6986 return isId(getToken(), Id); 6987 } 6988 6989 bool 6990 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6991 return getTokenKind() == Kind; 6992 } 6993 6994 bool 6995 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6996 if (isId(Id)) { 6997 lex(); 6998 return true; 6999 } 7000 return false; 7001 } 7002 7003 bool 7004 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7005 if (isToken(AsmToken::Identifier)) { 7006 StringRef Tok = getTokenStr(); 7007 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7008 lex(); 7009 return true; 7010 } 7011 } 7012 return false; 7013 } 7014 7015 bool 7016 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7017 if (isId(Id) && peekToken().is(Kind)) { 7018 lex(); 7019 lex(); 7020 return true; 7021 } 7022 return false; 7023 } 7024 7025 bool 7026 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7027 if (isToken(Kind)) { 7028 lex(); 7029 return true; 7030 } 7031 return false; 7032 } 7033 7034 bool 7035 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7036 const StringRef ErrMsg) { 7037 if (!trySkipToken(Kind)) { 7038 Error(getLoc(), ErrMsg); 7039 return false; 7040 } 7041 return true; 7042 } 7043 7044 bool 7045 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7046 SMLoc S = getLoc(); 7047 7048 const MCExpr *Expr; 7049 if (Parser.parseExpression(Expr)) 7050 return false; 7051 7052 if (Expr->evaluateAsAbsolute(Imm)) 7053 return true; 7054 7055 if (Expected.empty()) { 7056 Error(S, "expected absolute expression"); 7057 } else { 7058 Error(S, Twine("expected ", Expected) + 7059 Twine(" or an absolute expression")); 7060 } 7061 return false; 7062 } 7063 7064 bool 7065 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7066 SMLoc S = getLoc(); 7067 7068 const MCExpr *Expr; 7069 if (Parser.parseExpression(Expr)) 7070 return false; 7071 7072 int64_t IntVal; 7073 if (Expr->evaluateAsAbsolute(IntVal)) { 7074 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7075 } else { 7076 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7077 } 7078 return true; 7079 } 7080 7081 bool 7082 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7083 if (isToken(AsmToken::String)) { 7084 Val = getToken().getStringContents(); 7085 lex(); 7086 return true; 7087 } else { 7088 Error(getLoc(), ErrMsg); 7089 return false; 7090 } 7091 } 7092 7093 bool 7094 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7095 if (isToken(AsmToken::Identifier)) { 7096 Val = getTokenStr(); 7097 lex(); 7098 return true; 7099 } else { 7100 if (!ErrMsg.empty()) 7101 Error(getLoc(), ErrMsg); 7102 return false; 7103 } 7104 } 7105 7106 AsmToken 7107 AMDGPUAsmParser::getToken() const { 7108 return Parser.getTok(); 7109 } 7110 7111 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7112 return isToken(AsmToken::EndOfStatement) 7113 ? getToken() 7114 : getLexer().peekTok(ShouldSkipSpace); 7115 } 7116 7117 void 7118 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7119 auto TokCount = getLexer().peekTokens(Tokens); 7120 7121 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7122 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7123 } 7124 7125 AsmToken::TokenKind 7126 AMDGPUAsmParser::getTokenKind() const { 7127 return getLexer().getKind(); 7128 } 7129 7130 SMLoc 7131 AMDGPUAsmParser::getLoc() const { 7132 return getToken().getLoc(); 7133 } 7134 7135 StringRef 7136 AMDGPUAsmParser::getTokenStr() const { 7137 return getToken().getString(); 7138 } 7139 7140 void 7141 AMDGPUAsmParser::lex() { 7142 Parser.Lex(); 7143 } 7144 7145 SMLoc 7146 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7147 const OperandVector &Operands) const { 7148 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7149 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7150 if (Test(Op)) 7151 return Op.getStartLoc(); 7152 } 7153 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7154 } 7155 7156 SMLoc 7157 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7158 const OperandVector &Operands) const { 7159 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7160 return getOperandLoc(Test, Operands); 7161 } 7162 7163 SMLoc 7164 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7165 const OperandVector &Operands) const { 7166 auto Test = [=](const AMDGPUOperand& Op) { 7167 return Op.isRegKind() && Op.getReg() == Reg; 7168 }; 7169 return getOperandLoc(Test, Operands); 7170 } 7171 7172 SMLoc 7173 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7174 auto Test = [](const AMDGPUOperand& Op) { 7175 return Op.IsImmKindLiteral() || Op.isExpr(); 7176 }; 7177 return getOperandLoc(Test, Operands); 7178 } 7179 7180 SMLoc 7181 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7182 auto Test = [](const AMDGPUOperand& Op) { 7183 return Op.isImmKindConst(); 7184 }; 7185 return getOperandLoc(Test, Operands); 7186 } 7187 7188 //===----------------------------------------------------------------------===// 7189 // swizzle 7190 //===----------------------------------------------------------------------===// 7191 7192 LLVM_READNONE 7193 static unsigned 7194 encodeBitmaskPerm(const unsigned AndMask, 7195 const unsigned OrMask, 7196 const unsigned XorMask) { 7197 using namespace llvm::AMDGPU::Swizzle; 7198 7199 return BITMASK_PERM_ENC | 7200 (AndMask << BITMASK_AND_SHIFT) | 7201 (OrMask << BITMASK_OR_SHIFT) | 7202 (XorMask << BITMASK_XOR_SHIFT); 7203 } 7204 7205 bool 7206 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7207 const unsigned MinVal, 7208 const unsigned MaxVal, 7209 const StringRef ErrMsg, 7210 SMLoc &Loc) { 7211 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7212 return false; 7213 } 7214 Loc = getLoc(); 7215 if (!parseExpr(Op)) { 7216 return false; 7217 } 7218 if (Op < MinVal || Op > MaxVal) { 7219 Error(Loc, ErrMsg); 7220 return false; 7221 } 7222 7223 return true; 7224 } 7225 7226 bool 7227 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7228 const unsigned MinVal, 7229 const unsigned MaxVal, 7230 const StringRef ErrMsg) { 7231 SMLoc Loc; 7232 for (unsigned i = 0; i < OpNum; ++i) { 7233 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7234 return false; 7235 } 7236 7237 return true; 7238 } 7239 7240 bool 7241 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7242 using namespace llvm::AMDGPU::Swizzle; 7243 7244 int64_t Lane[LANE_NUM]; 7245 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7246 "expected a 2-bit lane id")) { 7247 Imm = QUAD_PERM_ENC; 7248 for (unsigned I = 0; I < LANE_NUM; ++I) { 7249 Imm |= Lane[I] << (LANE_SHIFT * I); 7250 } 7251 return true; 7252 } 7253 return false; 7254 } 7255 7256 bool 7257 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7258 using namespace llvm::AMDGPU::Swizzle; 7259 7260 SMLoc Loc; 7261 int64_t GroupSize; 7262 int64_t LaneIdx; 7263 7264 if (!parseSwizzleOperand(GroupSize, 7265 2, 32, 7266 "group size must be in the interval [2,32]", 7267 Loc)) { 7268 return false; 7269 } 7270 if (!isPowerOf2_64(GroupSize)) { 7271 Error(Loc, "group size must be a power of two"); 7272 return false; 7273 } 7274 if (parseSwizzleOperand(LaneIdx, 7275 0, GroupSize - 1, 7276 "lane id must be in the interval [0,group size - 1]", 7277 Loc)) { 7278 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7279 return true; 7280 } 7281 return false; 7282 } 7283 7284 bool 7285 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7286 using namespace llvm::AMDGPU::Swizzle; 7287 7288 SMLoc Loc; 7289 int64_t GroupSize; 7290 7291 if (!parseSwizzleOperand(GroupSize, 7292 2, 32, 7293 "group size must be in the interval [2,32]", 7294 Loc)) { 7295 return false; 7296 } 7297 if (!isPowerOf2_64(GroupSize)) { 7298 Error(Loc, "group size must be a power of two"); 7299 return false; 7300 } 7301 7302 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7303 return true; 7304 } 7305 7306 bool 7307 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7308 using namespace llvm::AMDGPU::Swizzle; 7309 7310 SMLoc Loc; 7311 int64_t GroupSize; 7312 7313 if (!parseSwizzleOperand(GroupSize, 7314 1, 16, 7315 "group size must be in the interval [1,16]", 7316 Loc)) { 7317 return false; 7318 } 7319 if (!isPowerOf2_64(GroupSize)) { 7320 Error(Loc, "group size must be a power of two"); 7321 return false; 7322 } 7323 7324 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7325 return true; 7326 } 7327 7328 bool 7329 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7330 using namespace llvm::AMDGPU::Swizzle; 7331 7332 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7333 return false; 7334 } 7335 7336 StringRef Ctl; 7337 SMLoc StrLoc = getLoc(); 7338 if (!parseString(Ctl)) { 7339 return false; 7340 } 7341 if (Ctl.size() != BITMASK_WIDTH) { 7342 Error(StrLoc, "expected a 5-character mask"); 7343 return false; 7344 } 7345 7346 unsigned AndMask = 0; 7347 unsigned OrMask = 0; 7348 unsigned XorMask = 0; 7349 7350 for (size_t i = 0; i < Ctl.size(); ++i) { 7351 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7352 switch(Ctl[i]) { 7353 default: 7354 Error(StrLoc, "invalid mask"); 7355 return false; 7356 case '0': 7357 break; 7358 case '1': 7359 OrMask |= Mask; 7360 break; 7361 case 'p': 7362 AndMask |= Mask; 7363 break; 7364 case 'i': 7365 AndMask |= Mask; 7366 XorMask |= Mask; 7367 break; 7368 } 7369 } 7370 7371 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7372 return true; 7373 } 7374 7375 bool 7376 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7377 7378 SMLoc OffsetLoc = getLoc(); 7379 7380 if (!parseExpr(Imm, "a swizzle macro")) { 7381 return false; 7382 } 7383 if (!isUInt<16>(Imm)) { 7384 Error(OffsetLoc, "expected a 16-bit offset"); 7385 return false; 7386 } 7387 return true; 7388 } 7389 7390 bool 7391 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7392 using namespace llvm::AMDGPU::Swizzle; 7393 7394 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7395 7396 SMLoc ModeLoc = getLoc(); 7397 bool Ok = false; 7398 7399 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7400 Ok = parseSwizzleQuadPerm(Imm); 7401 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7402 Ok = parseSwizzleBitmaskPerm(Imm); 7403 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7404 Ok = parseSwizzleBroadcast(Imm); 7405 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7406 Ok = parseSwizzleSwap(Imm); 7407 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7408 Ok = parseSwizzleReverse(Imm); 7409 } else { 7410 Error(ModeLoc, "expected a swizzle mode"); 7411 } 7412 7413 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7414 } 7415 7416 return false; 7417 } 7418 7419 OperandMatchResultTy 7420 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7421 SMLoc S = getLoc(); 7422 int64_t Imm = 0; 7423 7424 if (trySkipId("offset")) { 7425 7426 bool Ok = false; 7427 if (skipToken(AsmToken::Colon, "expected a colon")) { 7428 if (trySkipId("swizzle")) { 7429 Ok = parseSwizzleMacro(Imm); 7430 } else { 7431 Ok = parseSwizzleOffset(Imm); 7432 } 7433 } 7434 7435 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7436 7437 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7438 } else { 7439 // Swizzle "offset" operand is optional. 7440 // If it is omitted, try parsing other optional operands. 7441 return parseOptionalOpr(Operands); 7442 } 7443 } 7444 7445 bool 7446 AMDGPUOperand::isSwizzle() const { 7447 return isImmTy(ImmTySwizzle); 7448 } 7449 7450 //===----------------------------------------------------------------------===// 7451 // VGPR Index Mode 7452 //===----------------------------------------------------------------------===// 7453 7454 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7455 7456 using namespace llvm::AMDGPU::VGPRIndexMode; 7457 7458 if (trySkipToken(AsmToken::RParen)) { 7459 return OFF; 7460 } 7461 7462 int64_t Imm = 0; 7463 7464 while (true) { 7465 unsigned Mode = 0; 7466 SMLoc S = getLoc(); 7467 7468 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7469 if (trySkipId(IdSymbolic[ModeId])) { 7470 Mode = 1 << ModeId; 7471 break; 7472 } 7473 } 7474 7475 if (Mode == 0) { 7476 Error(S, (Imm == 0)? 7477 "expected a VGPR index mode or a closing parenthesis" : 7478 "expected a VGPR index mode"); 7479 return UNDEF; 7480 } 7481 7482 if (Imm & Mode) { 7483 Error(S, "duplicate VGPR index mode"); 7484 return UNDEF; 7485 } 7486 Imm |= Mode; 7487 7488 if (trySkipToken(AsmToken::RParen)) 7489 break; 7490 if (!skipToken(AsmToken::Comma, 7491 "expected a comma or a closing parenthesis")) 7492 return UNDEF; 7493 } 7494 7495 return Imm; 7496 } 7497 7498 OperandMatchResultTy 7499 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7500 7501 using namespace llvm::AMDGPU::VGPRIndexMode; 7502 7503 int64_t Imm = 0; 7504 SMLoc S = getLoc(); 7505 7506 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7507 Imm = parseGPRIdxMacro(); 7508 if (Imm == UNDEF) 7509 return MatchOperand_ParseFail; 7510 } else { 7511 if (getParser().parseAbsoluteExpression(Imm)) 7512 return MatchOperand_ParseFail; 7513 if (Imm < 0 || !isUInt<4>(Imm)) { 7514 Error(S, "invalid immediate: only 4-bit values are legal"); 7515 return MatchOperand_ParseFail; 7516 } 7517 } 7518 7519 Operands.push_back( 7520 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7521 return MatchOperand_Success; 7522 } 7523 7524 bool AMDGPUOperand::isGPRIdxMode() const { 7525 return isImmTy(ImmTyGprIdxMode); 7526 } 7527 7528 //===----------------------------------------------------------------------===// 7529 // sopp branch targets 7530 //===----------------------------------------------------------------------===// 7531 7532 OperandMatchResultTy 7533 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7534 7535 // Make sure we are not parsing something 7536 // that looks like a label or an expression but is not. 7537 // This will improve error messages. 7538 if (isRegister() || isModifier()) 7539 return MatchOperand_NoMatch; 7540 7541 if (!parseExpr(Operands)) 7542 return MatchOperand_ParseFail; 7543 7544 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7545 assert(Opr.isImm() || Opr.isExpr()); 7546 SMLoc Loc = Opr.getStartLoc(); 7547 7548 // Currently we do not support arbitrary expressions as branch targets. 7549 // Only labels and absolute expressions are accepted. 7550 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7551 Error(Loc, "expected an absolute expression or a label"); 7552 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7553 Error(Loc, "expected a 16-bit signed jump offset"); 7554 } 7555 7556 return MatchOperand_Success; 7557 } 7558 7559 //===----------------------------------------------------------------------===// 7560 // Boolean holding registers 7561 //===----------------------------------------------------------------------===// 7562 7563 OperandMatchResultTy 7564 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7565 return parseReg(Operands); 7566 } 7567 7568 //===----------------------------------------------------------------------===// 7569 // mubuf 7570 //===----------------------------------------------------------------------===// 7571 7572 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7573 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7574 } 7575 7576 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7577 const OperandVector &Operands, 7578 bool IsAtomic, 7579 bool IsLds) { 7580 OptionalImmIndexMap OptionalIdx; 7581 unsigned FirstOperandIdx = 1; 7582 bool IsAtomicReturn = false; 7583 7584 if (IsAtomic) { 7585 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7587 if (!Op.isCPol()) 7588 continue; 7589 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7590 break; 7591 } 7592 7593 if (!IsAtomicReturn) { 7594 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7595 if (NewOpc != -1) 7596 Inst.setOpcode(NewOpc); 7597 } 7598 7599 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7600 SIInstrFlags::IsAtomicRet; 7601 } 7602 7603 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7604 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7605 7606 // Add the register arguments 7607 if (Op.isReg()) { 7608 Op.addRegOperands(Inst, 1); 7609 // Insert a tied src for atomic return dst. 7610 // This cannot be postponed as subsequent calls to 7611 // addImmOperands rely on correct number of MC operands. 7612 if (IsAtomicReturn && i == FirstOperandIdx) 7613 Op.addRegOperands(Inst, 1); 7614 continue; 7615 } 7616 7617 // Handle the case where soffset is an immediate 7618 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7619 Op.addImmOperands(Inst, 1); 7620 continue; 7621 } 7622 7623 // Handle tokens like 'offen' which are sometimes hard-coded into the 7624 // asm string. There are no MCInst operands for these. 7625 if (Op.isToken()) { 7626 continue; 7627 } 7628 assert(Op.isImm()); 7629 7630 // Handle optional arguments 7631 OptionalIdx[Op.getImmTy()] = i; 7632 } 7633 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7636 7637 if (!IsLds) { // tfe is not legal with lds opcodes 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7639 } 7640 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7641 } 7642 7643 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7644 OptionalImmIndexMap OptionalIdx; 7645 7646 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7647 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7648 7649 // Add the register arguments 7650 if (Op.isReg()) { 7651 Op.addRegOperands(Inst, 1); 7652 continue; 7653 } 7654 7655 // Handle the case where soffset is an immediate 7656 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7657 Op.addImmOperands(Inst, 1); 7658 continue; 7659 } 7660 7661 // Handle tokens like 'offen' which are sometimes hard-coded into the 7662 // asm string. There are no MCInst operands for these. 7663 if (Op.isToken()) { 7664 continue; 7665 } 7666 assert(Op.isImm()); 7667 7668 // Handle optional arguments 7669 OptionalIdx[Op.getImmTy()] = i; 7670 } 7671 7672 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7673 AMDGPUOperand::ImmTyOffset); 7674 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7678 } 7679 7680 //===----------------------------------------------------------------------===// 7681 // mimg 7682 //===----------------------------------------------------------------------===// 7683 7684 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7685 bool IsAtomic) { 7686 unsigned I = 1; 7687 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7688 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7689 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7690 } 7691 7692 if (IsAtomic) { 7693 // Add src, same as dst 7694 assert(Desc.getNumDefs() == 1); 7695 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7696 } 7697 7698 OptionalImmIndexMap OptionalIdx; 7699 7700 for (unsigned E = Operands.size(); I != E; ++I) { 7701 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7702 7703 // Add the register arguments 7704 if (Op.isReg()) { 7705 Op.addRegOperands(Inst, 1); 7706 } else if (Op.isImmModifier()) { 7707 OptionalIdx[Op.getImmTy()] = I; 7708 } else if (!Op.isToken()) { 7709 llvm_unreachable("unexpected operand type"); 7710 } 7711 } 7712 7713 bool IsGFX10Plus = isGFX10Plus(); 7714 7715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7716 if (IsGFX10Plus) 7717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7721 if (IsGFX10Plus) 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7723 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7726 if (!IsGFX10Plus) 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7729 } 7730 7731 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7732 cvtMIMG(Inst, Operands, true); 7733 } 7734 7735 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7736 OptionalImmIndexMap OptionalIdx; 7737 bool IsAtomicReturn = false; 7738 7739 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7740 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7741 if (!Op.isCPol()) 7742 continue; 7743 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7744 break; 7745 } 7746 7747 if (!IsAtomicReturn) { 7748 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7749 if (NewOpc != -1) 7750 Inst.setOpcode(NewOpc); 7751 } 7752 7753 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7754 SIInstrFlags::IsAtomicRet; 7755 7756 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7757 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7758 7759 // Add the register arguments 7760 if (Op.isReg()) { 7761 Op.addRegOperands(Inst, 1); 7762 if (IsAtomicReturn && i == 1) 7763 Op.addRegOperands(Inst, 1); 7764 continue; 7765 } 7766 7767 // Handle the case where soffset is an immediate 7768 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7769 Op.addImmOperands(Inst, 1); 7770 continue; 7771 } 7772 7773 // Handle tokens like 'offen' which are sometimes hard-coded into the 7774 // asm string. There are no MCInst operands for these. 7775 if (Op.isToken()) { 7776 continue; 7777 } 7778 assert(Op.isImm()); 7779 7780 // Handle optional arguments 7781 OptionalIdx[Op.getImmTy()] = i; 7782 } 7783 7784 if ((int)Inst.getNumOperands() <= 7785 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7788 } 7789 7790 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7791 const OperandVector &Operands) { 7792 for (unsigned I = 1; I < Operands.size(); ++I) { 7793 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7794 if (Operand.isReg()) 7795 Operand.addRegOperands(Inst, 1); 7796 } 7797 7798 Inst.addOperand(MCOperand::createImm(1)); // a16 7799 } 7800 7801 //===----------------------------------------------------------------------===// 7802 // smrd 7803 //===----------------------------------------------------------------------===// 7804 7805 bool AMDGPUOperand::isSMRDOffset8() const { 7806 return isImm() && isUInt<8>(getImm()); 7807 } 7808 7809 bool AMDGPUOperand::isSMEMOffset() const { 7810 return isImmTy(ImmTyNone) || 7811 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7812 } 7813 7814 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7815 // 32-bit literals are only supported on CI and we only want to use them 7816 // when the offset is > 8-bits. 7817 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7818 } 7819 7820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7821 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7822 } 7823 7824 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7825 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7826 } 7827 7828 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7829 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7830 } 7831 7832 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7833 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7834 } 7835 7836 //===----------------------------------------------------------------------===// 7837 // vop3 7838 //===----------------------------------------------------------------------===// 7839 7840 static bool ConvertOmodMul(int64_t &Mul) { 7841 if (Mul != 1 && Mul != 2 && Mul != 4) 7842 return false; 7843 7844 Mul >>= 1; 7845 return true; 7846 } 7847 7848 static bool ConvertOmodDiv(int64_t &Div) { 7849 if (Div == 1) { 7850 Div = 0; 7851 return true; 7852 } 7853 7854 if (Div == 2) { 7855 Div = 3; 7856 return true; 7857 } 7858 7859 return false; 7860 } 7861 7862 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7863 // This is intentional and ensures compatibility with sp3. 7864 // See bug 35397 for details. 7865 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7866 if (BoundCtrl == 0 || BoundCtrl == 1) { 7867 BoundCtrl = 1; 7868 return true; 7869 } 7870 return false; 7871 } 7872 7873 // Note: the order in this table matches the order of operands in AsmString. 7874 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7875 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7876 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7877 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7878 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7879 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7880 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7881 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7882 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7883 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7884 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7885 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7886 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7887 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7888 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7889 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7890 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7891 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7892 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7893 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7894 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7895 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7896 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7897 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7898 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7899 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7900 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7901 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7902 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7903 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7904 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7905 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7906 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7907 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7908 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7909 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7910 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7911 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7912 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7913 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7914 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7915 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7916 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7917 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7918 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7919 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7920 }; 7921 7922 void AMDGPUAsmParser::onBeginOfFile() { 7923 if (!getParser().getStreamer().getTargetStreamer() || 7924 getSTI().getTargetTriple().getArch() == Triple::r600) 7925 return; 7926 7927 if (!getTargetStreamer().getTargetID()) 7928 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7929 7930 if (isHsaAbiVersion3AndAbove(&getSTI())) 7931 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7932 } 7933 7934 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7935 7936 OperandMatchResultTy res = parseOptionalOpr(Operands); 7937 7938 // This is a hack to enable hardcoded mandatory operands which follow 7939 // optional operands. 7940 // 7941 // Current design assumes that all operands after the first optional operand 7942 // are also optional. However implementation of some instructions violates 7943 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7944 // 7945 // To alleviate this problem, we have to (implicitly) parse extra operands 7946 // to make sure autogenerated parser of custom operands never hit hardcoded 7947 // mandatory operands. 7948 7949 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7950 if (res != MatchOperand_Success || 7951 isToken(AsmToken::EndOfStatement)) 7952 break; 7953 7954 trySkipToken(AsmToken::Comma); 7955 res = parseOptionalOpr(Operands); 7956 } 7957 7958 return res; 7959 } 7960 7961 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7962 OperandMatchResultTy res; 7963 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7964 // try to parse any optional operand here 7965 if (Op.IsBit) { 7966 res = parseNamedBit(Op.Name, Operands, Op.Type); 7967 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7968 res = parseOModOperand(Operands); 7969 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7970 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7971 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7972 res = parseSDWASel(Operands, Op.Name, Op.Type); 7973 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7974 res = parseSDWADstUnused(Operands); 7975 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7976 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7977 Op.Type == AMDGPUOperand::ImmTyNegLo || 7978 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7979 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7980 Op.ConvertResult); 7981 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7982 res = parseDim(Operands); 7983 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7984 res = parseCPol(Operands); 7985 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7986 res = parseDPP8(Operands); 7987 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7988 res = parseDPPCtrl(Operands); 7989 } else { 7990 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7991 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7992 res = parseOperandArrayWithPrefix("neg", Operands, 7993 AMDGPUOperand::ImmTyBLGP, 7994 nullptr); 7995 } 7996 } 7997 if (res != MatchOperand_NoMatch) { 7998 return res; 7999 } 8000 } 8001 return MatchOperand_NoMatch; 8002 } 8003 8004 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8005 StringRef Name = getTokenStr(); 8006 if (Name == "mul") { 8007 return parseIntWithPrefix("mul", Operands, 8008 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8009 } 8010 8011 if (Name == "div") { 8012 return parseIntWithPrefix("div", Operands, 8013 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8014 } 8015 8016 return MatchOperand_NoMatch; 8017 } 8018 8019 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8020 // the number of src operands present, then copies that bit into src0_modifiers. 8021 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8022 int Opc = Inst.getOpcode(); 8023 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8024 if (OpSelIdx == -1) 8025 return; 8026 8027 int SrcNum; 8028 const int Ops[] = { AMDGPU::OpName::src0, 8029 AMDGPU::OpName::src1, 8030 AMDGPU::OpName::src2 }; 8031 for (SrcNum = 0; 8032 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8033 ++SrcNum); 8034 assert(SrcNum > 0); 8035 8036 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8037 8038 if ((OpSel & (1 << SrcNum)) != 0) { 8039 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8040 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8041 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8042 } 8043 } 8044 8045 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8046 const OperandVector &Operands) { 8047 cvtVOP3P(Inst, Operands); 8048 cvtVOP3DstOpSelOnly(Inst); 8049 } 8050 8051 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8052 OptionalImmIndexMap &OptionalIdx) { 8053 cvtVOP3P(Inst, Operands, OptionalIdx); 8054 cvtVOP3DstOpSelOnly(Inst); 8055 } 8056 8057 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8058 // 1. This operand is input modifiers 8059 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8060 // 2. This is not last operand 8061 && Desc.NumOperands > (OpNum + 1) 8062 // 3. Next operand is register class 8063 && Desc.OpInfo[OpNum + 1].RegClass != -1 8064 // 4. Next register is not tied to any other operand 8065 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8066 } 8067 8068 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8069 { 8070 OptionalImmIndexMap OptionalIdx; 8071 unsigned Opc = Inst.getOpcode(); 8072 8073 unsigned I = 1; 8074 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8075 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8076 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8077 } 8078 8079 for (unsigned E = Operands.size(); I != E; ++I) { 8080 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8081 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8082 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8083 } else if (Op.isInterpSlot() || 8084 Op.isInterpAttr() || 8085 Op.isAttrChan()) { 8086 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8087 } else if (Op.isImmModifier()) { 8088 OptionalIdx[Op.getImmTy()] = I; 8089 } else { 8090 llvm_unreachable("unhandled operand type"); 8091 } 8092 } 8093 8094 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8096 } 8097 8098 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8100 } 8101 8102 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8104 } 8105 } 8106 8107 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8108 { 8109 OptionalImmIndexMap OptionalIdx; 8110 unsigned Opc = Inst.getOpcode(); 8111 8112 unsigned I = 1; 8113 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8114 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8115 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8116 } 8117 8118 for (unsigned E = Operands.size(); I != E; ++I) { 8119 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8120 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8121 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8122 } else if (Op.isImmModifier()) { 8123 OptionalIdx[Op.getImmTy()] = I; 8124 } else { 8125 llvm_unreachable("unhandled operand type"); 8126 } 8127 } 8128 8129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8130 8131 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8132 if (OpSelIdx != -1) 8133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8134 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8136 8137 if (OpSelIdx == -1) 8138 return; 8139 8140 const int Ops[] = { AMDGPU::OpName::src0, 8141 AMDGPU::OpName::src1, 8142 AMDGPU::OpName::src2 }; 8143 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8144 AMDGPU::OpName::src1_modifiers, 8145 AMDGPU::OpName::src2_modifiers }; 8146 8147 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8148 8149 for (int J = 0; J < 3; ++J) { 8150 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8151 if (OpIdx == -1) 8152 break; 8153 8154 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8155 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8156 8157 if ((OpSel & (1 << J)) != 0) 8158 ModVal |= SISrcMods::OP_SEL_0; 8159 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8160 (OpSel & (1 << 3)) != 0) 8161 ModVal |= SISrcMods::DST_OP_SEL; 8162 8163 Inst.getOperand(ModIdx).setImm(ModVal); 8164 } 8165 } 8166 8167 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8168 OptionalImmIndexMap &OptionalIdx) { 8169 unsigned Opc = Inst.getOpcode(); 8170 8171 unsigned I = 1; 8172 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8173 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8174 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8175 } 8176 8177 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8178 // This instruction has src modifiers 8179 for (unsigned E = Operands.size(); I != E; ++I) { 8180 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8181 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8182 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8183 } else if (Op.isImmModifier()) { 8184 OptionalIdx[Op.getImmTy()] = I; 8185 } else if (Op.isRegOrImm()) { 8186 Op.addRegOrImmOperands(Inst, 1); 8187 } else { 8188 llvm_unreachable("unhandled operand type"); 8189 } 8190 } 8191 } else { 8192 // No src modifiers 8193 for (unsigned E = Operands.size(); I != E; ++I) { 8194 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8195 if (Op.isMod()) { 8196 OptionalIdx[Op.getImmTy()] = I; 8197 } else { 8198 Op.addRegOrImmOperands(Inst, 1); 8199 } 8200 } 8201 } 8202 8203 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8205 } 8206 8207 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8208 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8209 } 8210 8211 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8212 // it has src2 register operand that is tied to dst operand 8213 // we don't allow modifiers for this operand in assembler so src2_modifiers 8214 // should be 0. 8215 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8216 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8217 Opc == AMDGPU::V_MAC_F32_e64_vi || 8218 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8219 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8220 Opc == AMDGPU::V_MAC_F16_e64_vi || 8221 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8222 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8223 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8224 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8225 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8226 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8227 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8228 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8229 auto it = Inst.begin(); 8230 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8231 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8232 ++it; 8233 // Copy the operand to ensure it's not invalidated when Inst grows. 8234 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8235 } 8236 } 8237 8238 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8239 OptionalImmIndexMap OptionalIdx; 8240 cvtVOP3(Inst, Operands, OptionalIdx); 8241 } 8242 8243 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8244 OptionalImmIndexMap &OptIdx) { 8245 const int Opc = Inst.getOpcode(); 8246 const MCInstrDesc &Desc = MII.get(Opc); 8247 8248 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8249 8250 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8251 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 8252 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8253 Inst.addOperand(Inst.getOperand(0)); 8254 } 8255 8256 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8257 assert(!IsPacked); 8258 Inst.addOperand(Inst.getOperand(0)); 8259 } 8260 8261 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8262 // instruction, and then figure out where to actually put the modifiers 8263 8264 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8265 if (OpSelIdx != -1) { 8266 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8267 } 8268 8269 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8270 if (OpSelHiIdx != -1) { 8271 int DefaultVal = IsPacked ? -1 : 0; 8272 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8273 DefaultVal); 8274 } 8275 8276 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8277 if (NegLoIdx != -1) { 8278 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8279 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8280 } 8281 8282 const int Ops[] = { AMDGPU::OpName::src0, 8283 AMDGPU::OpName::src1, 8284 AMDGPU::OpName::src2 }; 8285 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8286 AMDGPU::OpName::src1_modifiers, 8287 AMDGPU::OpName::src2_modifiers }; 8288 8289 unsigned OpSel = 0; 8290 unsigned OpSelHi = 0; 8291 unsigned NegLo = 0; 8292 unsigned NegHi = 0; 8293 8294 if (OpSelIdx != -1) 8295 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8296 8297 if (OpSelHiIdx != -1) 8298 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8299 8300 if (NegLoIdx != -1) { 8301 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8302 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8303 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8304 } 8305 8306 for (int J = 0; J < 3; ++J) { 8307 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8308 if (OpIdx == -1) 8309 break; 8310 8311 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8312 8313 if (ModIdx == -1) 8314 continue; 8315 8316 uint32_t ModVal = 0; 8317 8318 if ((OpSel & (1 << J)) != 0) 8319 ModVal |= SISrcMods::OP_SEL_0; 8320 8321 if ((OpSelHi & (1 << J)) != 0) 8322 ModVal |= SISrcMods::OP_SEL_1; 8323 8324 if ((NegLo & (1 << J)) != 0) 8325 ModVal |= SISrcMods::NEG; 8326 8327 if ((NegHi & (1 << J)) != 0) 8328 ModVal |= SISrcMods::NEG_HI; 8329 8330 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8331 } 8332 } 8333 8334 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8335 OptionalImmIndexMap OptIdx; 8336 cvtVOP3(Inst, Operands, OptIdx); 8337 cvtVOP3P(Inst, Operands, OptIdx); 8338 } 8339 8340 //===----------------------------------------------------------------------===// 8341 // VOPD 8342 //===----------------------------------------------------------------------===// 8343 8344 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8345 if (!hasVOPD(getSTI())) 8346 return MatchOperand_NoMatch; 8347 8348 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8349 SMLoc S = getLoc(); 8350 lex(); 8351 lex(); 8352 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8353 const MCExpr *Expr; 8354 if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { 8355 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 8356 return MatchOperand_Success; 8357 } 8358 Error(S, "invalid VOPD :: usage"); 8359 return MatchOperand_ParseFail; 8360 } 8361 return MatchOperand_NoMatch; 8362 } 8363 8364 // Create VOPD MCInst operands using parsed assembler operands. 8365 // Parsed VOPD operands are ordered as follows: 8366 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 8367 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8368 // If both OpX and OpY have an imm, the first imm has a different name: 8369 // OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' 8370 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8371 // MCInst operands have the following order: 8372 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8373 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8374 auto addOp = [&](uint16_t i) { // NOLINT:function pointer 8375 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8376 if (Op.isReg()) { 8377 Op.addRegOperands(Inst, 1); 8378 return; 8379 } 8380 if (Op.isImm()) { 8381 Op.addImmOperands(Inst, 1); 8382 return; 8383 } 8384 // Handle tokens like 'offen' which are sometimes hard-coded into the 8385 // asm string. There are no MCInst operands for these. 8386 if (Op.isToken()) { 8387 return; 8388 } 8389 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8390 }; 8391 8392 // Indices into MCInst.Operands 8393 const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... 8394 const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... 8395 const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... 8396 8397 unsigned Opc = Inst.getOpcode(); 8398 bool HasVsrc1X = 8399 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; 8400 bool HasImmX = 8401 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8402 (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8403 FmamkOpXImmMCIndex || 8404 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8405 FmaakOpXImmMCIndex)); 8406 8407 bool HasVsrc1Y = 8408 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; 8409 bool HasImmY = 8410 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8411 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= 8412 MinOpYImmMCIndex + HasVsrc1X; 8413 8414 // Indices of parsed operands relative to dst 8415 const auto DstIdx = 0; 8416 const auto Src0Idx = 1; 8417 const auto Vsrc1OrImmIdx = 2; 8418 8419 const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; 8420 const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) 8421 8422 // Offsets into parsed operands 8423 const auto OpXFirstOperandOffset = 1; 8424 const auto OpYFirstOperandOffset = 8425 OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; 8426 8427 // Order of addOp calls determines MC operand order 8428 addOp(OpXFirstOperandOffset + DstIdx); // vdstX 8429 addOp(OpYFirstOperandOffset + DstIdx); // vdstY 8430 8431 addOp(OpXFirstOperandOffset + Src0Idx); // src0X 8432 if (HasImmX) { 8433 // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak 8434 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); 8435 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); 8436 } else { 8437 if (HasVsrc1X) // all except v_mov 8438 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X 8439 } 8440 8441 addOp(OpYFirstOperandOffset + Src0Idx); // src0Y 8442 if (HasImmY) { 8443 // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak 8444 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); 8445 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); 8446 } else { 8447 if (HasVsrc1Y) // all except v_mov 8448 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y 8449 } 8450 } 8451 8452 //===----------------------------------------------------------------------===// 8453 // dpp 8454 //===----------------------------------------------------------------------===// 8455 8456 bool AMDGPUOperand::isDPP8() const { 8457 return isImmTy(ImmTyDPP8); 8458 } 8459 8460 bool AMDGPUOperand::isDPPCtrl() const { 8461 using namespace AMDGPU::DPP; 8462 8463 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8464 if (result) { 8465 int64_t Imm = getImm(); 8466 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8467 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8468 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8469 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8470 (Imm == DppCtrl::WAVE_SHL1) || 8471 (Imm == DppCtrl::WAVE_ROL1) || 8472 (Imm == DppCtrl::WAVE_SHR1) || 8473 (Imm == DppCtrl::WAVE_ROR1) || 8474 (Imm == DppCtrl::ROW_MIRROR) || 8475 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8476 (Imm == DppCtrl::BCAST15) || 8477 (Imm == DppCtrl::BCAST31) || 8478 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8479 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8480 } 8481 return false; 8482 } 8483 8484 //===----------------------------------------------------------------------===// 8485 // mAI 8486 //===----------------------------------------------------------------------===// 8487 8488 bool AMDGPUOperand::isBLGP() const { 8489 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8490 } 8491 8492 bool AMDGPUOperand::isCBSZ() const { 8493 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8494 } 8495 8496 bool AMDGPUOperand::isABID() const { 8497 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8498 } 8499 8500 bool AMDGPUOperand::isS16Imm() const { 8501 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8502 } 8503 8504 bool AMDGPUOperand::isU16Imm() const { 8505 return isImm() && isUInt<16>(getImm()); 8506 } 8507 8508 //===----------------------------------------------------------------------===// 8509 // dim 8510 //===----------------------------------------------------------------------===// 8511 8512 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8513 // We want to allow "dim:1D" etc., 8514 // but the initial 1 is tokenized as an integer. 8515 std::string Token; 8516 if (isToken(AsmToken::Integer)) { 8517 SMLoc Loc = getToken().getEndLoc(); 8518 Token = std::string(getTokenStr()); 8519 lex(); 8520 if (getLoc() != Loc) 8521 return false; 8522 } 8523 8524 StringRef Suffix; 8525 if (!parseId(Suffix)) 8526 return false; 8527 Token += Suffix; 8528 8529 StringRef DimId = Token; 8530 if (DimId.startswith("SQ_RSRC_IMG_")) 8531 DimId = DimId.drop_front(12); 8532 8533 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8534 if (!DimInfo) 8535 return false; 8536 8537 Encoding = DimInfo->Encoding; 8538 return true; 8539 } 8540 8541 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8542 if (!isGFX10Plus()) 8543 return MatchOperand_NoMatch; 8544 8545 SMLoc S = getLoc(); 8546 8547 if (!trySkipId("dim", AsmToken::Colon)) 8548 return MatchOperand_NoMatch; 8549 8550 unsigned Encoding; 8551 SMLoc Loc = getLoc(); 8552 if (!parseDimId(Encoding)) { 8553 Error(Loc, "invalid dim value"); 8554 return MatchOperand_ParseFail; 8555 } 8556 8557 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8558 AMDGPUOperand::ImmTyDim)); 8559 return MatchOperand_Success; 8560 } 8561 8562 //===----------------------------------------------------------------------===// 8563 // dpp 8564 //===----------------------------------------------------------------------===// 8565 8566 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8567 SMLoc S = getLoc(); 8568 8569 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8570 return MatchOperand_NoMatch; 8571 8572 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8573 8574 int64_t Sels[8]; 8575 8576 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8577 return MatchOperand_ParseFail; 8578 8579 for (size_t i = 0; i < 8; ++i) { 8580 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8581 return MatchOperand_ParseFail; 8582 8583 SMLoc Loc = getLoc(); 8584 if (getParser().parseAbsoluteExpression(Sels[i])) 8585 return MatchOperand_ParseFail; 8586 if (0 > Sels[i] || 7 < Sels[i]) { 8587 Error(Loc, "expected a 3-bit value"); 8588 return MatchOperand_ParseFail; 8589 } 8590 } 8591 8592 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8593 return MatchOperand_ParseFail; 8594 8595 unsigned DPP8 = 0; 8596 for (size_t i = 0; i < 8; ++i) 8597 DPP8 |= (Sels[i] << (i * 3)); 8598 8599 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8600 return MatchOperand_Success; 8601 } 8602 8603 bool 8604 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8605 const OperandVector &Operands) { 8606 if (Ctrl == "row_newbcast") 8607 return isGFX90A(); 8608 8609 if (Ctrl == "row_share" || 8610 Ctrl == "row_xmask") 8611 return isGFX10Plus(); 8612 8613 if (Ctrl == "wave_shl" || 8614 Ctrl == "wave_shr" || 8615 Ctrl == "wave_rol" || 8616 Ctrl == "wave_ror" || 8617 Ctrl == "row_bcast") 8618 return isVI() || isGFX9(); 8619 8620 return Ctrl == "row_mirror" || 8621 Ctrl == "row_half_mirror" || 8622 Ctrl == "quad_perm" || 8623 Ctrl == "row_shl" || 8624 Ctrl == "row_shr" || 8625 Ctrl == "row_ror"; 8626 } 8627 8628 int64_t 8629 AMDGPUAsmParser::parseDPPCtrlPerm() { 8630 // quad_perm:[%d,%d,%d,%d] 8631 8632 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8633 return -1; 8634 8635 int64_t Val = 0; 8636 for (int i = 0; i < 4; ++i) { 8637 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8638 return -1; 8639 8640 int64_t Temp; 8641 SMLoc Loc = getLoc(); 8642 if (getParser().parseAbsoluteExpression(Temp)) 8643 return -1; 8644 if (Temp < 0 || Temp > 3) { 8645 Error(Loc, "expected a 2-bit value"); 8646 return -1; 8647 } 8648 8649 Val += (Temp << i * 2); 8650 } 8651 8652 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8653 return -1; 8654 8655 return Val; 8656 } 8657 8658 int64_t 8659 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8660 using namespace AMDGPU::DPP; 8661 8662 // sel:%d 8663 8664 int64_t Val; 8665 SMLoc Loc = getLoc(); 8666 8667 if (getParser().parseAbsoluteExpression(Val)) 8668 return -1; 8669 8670 struct DppCtrlCheck { 8671 int64_t Ctrl; 8672 int Lo; 8673 int Hi; 8674 }; 8675 8676 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8677 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8678 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8679 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8680 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8681 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8682 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8683 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8684 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8685 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8686 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8687 .Default({-1, 0, 0}); 8688 8689 bool Valid; 8690 if (Check.Ctrl == -1) { 8691 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8692 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8693 } else { 8694 Valid = Check.Lo <= Val && Val <= Check.Hi; 8695 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8696 } 8697 8698 if (!Valid) { 8699 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8700 return -1; 8701 } 8702 8703 return Val; 8704 } 8705 8706 OperandMatchResultTy 8707 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8708 using namespace AMDGPU::DPP; 8709 8710 if (!isToken(AsmToken::Identifier) || 8711 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8712 return MatchOperand_NoMatch; 8713 8714 SMLoc S = getLoc(); 8715 int64_t Val = -1; 8716 StringRef Ctrl; 8717 8718 parseId(Ctrl); 8719 8720 if (Ctrl == "row_mirror") { 8721 Val = DppCtrl::ROW_MIRROR; 8722 } else if (Ctrl == "row_half_mirror") { 8723 Val = DppCtrl::ROW_HALF_MIRROR; 8724 } else { 8725 if (skipToken(AsmToken::Colon, "expected a colon")) { 8726 if (Ctrl == "quad_perm") { 8727 Val = parseDPPCtrlPerm(); 8728 } else { 8729 Val = parseDPPCtrlSel(Ctrl); 8730 } 8731 } 8732 } 8733 8734 if (Val == -1) 8735 return MatchOperand_ParseFail; 8736 8737 Operands.push_back( 8738 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8739 return MatchOperand_Success; 8740 } 8741 8742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8743 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8744 } 8745 8746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8747 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8748 } 8749 8750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8751 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8752 } 8753 8754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8755 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8756 } 8757 8758 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8759 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8760 } 8761 8762 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8763 OptionalImmIndexMap OptionalIdx; 8764 unsigned Opc = Inst.getOpcode(); 8765 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8766 unsigned I = 1; 8767 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8768 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8769 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8770 } 8771 8772 int Fi = 0; 8773 for (unsigned E = Operands.size(); I != E; ++I) { 8774 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8775 MCOI::TIED_TO); 8776 if (TiedTo != -1) { 8777 assert((unsigned)TiedTo < Inst.getNumOperands()); 8778 // handle tied old or src2 for MAC instructions 8779 Inst.addOperand(Inst.getOperand(TiedTo)); 8780 } 8781 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8782 // Add the register arguments 8783 if (IsDPP8 && Op.isFI()) { 8784 Fi = Op.getImm(); 8785 } else if (HasModifiers && 8786 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8787 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8788 } else if (Op.isReg()) { 8789 Op.addRegOperands(Inst, 1); 8790 } else if (Op.isImm() && 8791 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8792 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8793 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8794 Op.addImmOperands(Inst, 1); 8795 } else if (Op.isImm()) { 8796 OptionalIdx[Op.getImmTy()] = I; 8797 } else { 8798 llvm_unreachable("unhandled operand type"); 8799 } 8800 } 8801 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8802 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8803 } 8804 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8805 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8806 } 8807 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8808 cvtVOP3P(Inst, Operands, OptionalIdx); 8809 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8810 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8811 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8813 } 8814 8815 if (IsDPP8) { 8816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8817 using namespace llvm::AMDGPU::DPP; 8818 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8819 } else { 8820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8821 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8822 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8824 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8825 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8826 } 8827 } 8828 } 8829 8830 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8831 OptionalImmIndexMap OptionalIdx; 8832 8833 unsigned Opc = Inst.getOpcode(); 8834 bool HasModifiers = 8835 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8836 unsigned I = 1; 8837 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8838 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8839 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8840 } 8841 8842 int Fi = 0; 8843 for (unsigned E = Operands.size(); I != E; ++I) { 8844 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8845 MCOI::TIED_TO); 8846 if (TiedTo != -1) { 8847 assert((unsigned)TiedTo < Inst.getNumOperands()); 8848 // handle tied old or src2 for MAC instructions 8849 Inst.addOperand(Inst.getOperand(TiedTo)); 8850 } 8851 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8852 // Add the register arguments 8853 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8854 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8855 // Skip it. 8856 continue; 8857 } 8858 8859 if (IsDPP8) { 8860 if (Op.isDPP8()) { 8861 Op.addImmOperands(Inst, 1); 8862 } else if (HasModifiers && 8863 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8864 Op.addRegWithFPInputModsOperands(Inst, 2); 8865 } else if (Op.isFI()) { 8866 Fi = Op.getImm(); 8867 } else if (Op.isReg()) { 8868 Op.addRegOperands(Inst, 1); 8869 } else { 8870 llvm_unreachable("Invalid operand type"); 8871 } 8872 } else { 8873 if (HasModifiers && 8874 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8875 Op.addRegWithFPInputModsOperands(Inst, 2); 8876 } else if (Op.isReg()) { 8877 Op.addRegOperands(Inst, 1); 8878 } else if (Op.isDPPCtrl()) { 8879 Op.addImmOperands(Inst, 1); 8880 } else if (Op.isImm()) { 8881 // Handle optional arguments 8882 OptionalIdx[Op.getImmTy()] = I; 8883 } else { 8884 llvm_unreachable("Invalid operand type"); 8885 } 8886 } 8887 } 8888 8889 if (IsDPP8) { 8890 using namespace llvm::AMDGPU::DPP; 8891 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8892 } else { 8893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8895 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8896 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8897 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8898 } 8899 } 8900 } 8901 8902 //===----------------------------------------------------------------------===// 8903 // sdwa 8904 //===----------------------------------------------------------------------===// 8905 8906 OperandMatchResultTy 8907 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8908 AMDGPUOperand::ImmTy Type) { 8909 using namespace llvm::AMDGPU::SDWA; 8910 8911 SMLoc S = getLoc(); 8912 StringRef Value; 8913 OperandMatchResultTy res; 8914 8915 SMLoc StringLoc; 8916 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8917 if (res != MatchOperand_Success) { 8918 return res; 8919 } 8920 8921 int64_t Int; 8922 Int = StringSwitch<int64_t>(Value) 8923 .Case("BYTE_0", SdwaSel::BYTE_0) 8924 .Case("BYTE_1", SdwaSel::BYTE_1) 8925 .Case("BYTE_2", SdwaSel::BYTE_2) 8926 .Case("BYTE_3", SdwaSel::BYTE_3) 8927 .Case("WORD_0", SdwaSel::WORD_0) 8928 .Case("WORD_1", SdwaSel::WORD_1) 8929 .Case("DWORD", SdwaSel::DWORD) 8930 .Default(0xffffffff); 8931 8932 if (Int == 0xffffffff) { 8933 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8934 return MatchOperand_ParseFail; 8935 } 8936 8937 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8938 return MatchOperand_Success; 8939 } 8940 8941 OperandMatchResultTy 8942 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8943 using namespace llvm::AMDGPU::SDWA; 8944 8945 SMLoc S = getLoc(); 8946 StringRef Value; 8947 OperandMatchResultTy res; 8948 8949 SMLoc StringLoc; 8950 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8951 if (res != MatchOperand_Success) { 8952 return res; 8953 } 8954 8955 int64_t Int; 8956 Int = StringSwitch<int64_t>(Value) 8957 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8958 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8959 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8960 .Default(0xffffffff); 8961 8962 if (Int == 0xffffffff) { 8963 Error(StringLoc, "invalid dst_unused value"); 8964 return MatchOperand_ParseFail; 8965 } 8966 8967 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8968 return MatchOperand_Success; 8969 } 8970 8971 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8972 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8973 } 8974 8975 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8976 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8977 } 8978 8979 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8980 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8981 } 8982 8983 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8984 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8985 } 8986 8987 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8988 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8989 } 8990 8991 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8992 uint64_t BasicInstType, 8993 bool SkipDstVcc, 8994 bool SkipSrcVcc) { 8995 using namespace llvm::AMDGPU::SDWA; 8996 8997 OptionalImmIndexMap OptionalIdx; 8998 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8999 bool SkippedVcc = false; 9000 9001 unsigned I = 1; 9002 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9003 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9004 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9005 } 9006 9007 for (unsigned E = Operands.size(); I != E; ++I) { 9008 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9009 if (SkipVcc && !SkippedVcc && Op.isReg() && 9010 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9011 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9012 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9013 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9014 // Skip VCC only if we didn't skip it on previous iteration. 9015 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9016 if (BasicInstType == SIInstrFlags::VOP2 && 9017 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9018 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9019 SkippedVcc = true; 9020 continue; 9021 } else if (BasicInstType == SIInstrFlags::VOPC && 9022 Inst.getNumOperands() == 0) { 9023 SkippedVcc = true; 9024 continue; 9025 } 9026 } 9027 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9028 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9029 } else if (Op.isImm()) { 9030 // Handle optional arguments 9031 OptionalIdx[Op.getImmTy()] = I; 9032 } else { 9033 llvm_unreachable("Invalid operand type"); 9034 } 9035 SkippedVcc = false; 9036 } 9037 9038 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 9039 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 9040 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 9041 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9042 switch (BasicInstType) { 9043 case SIInstrFlags::VOP1: 9044 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9045 AMDGPU::OpName::clamp) != -1) { 9046 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9047 AMDGPUOperand::ImmTyClampSI, 0); 9048 } 9049 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9050 AMDGPU::OpName::omod) != -1) { 9051 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9052 AMDGPUOperand::ImmTyOModSI, 0); 9053 } 9054 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9055 AMDGPU::OpName::dst_sel) != -1) { 9056 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9057 AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9058 } 9059 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9060 AMDGPU::OpName::dst_unused) != -1) { 9061 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9062 AMDGPUOperand::ImmTySdwaDstUnused, 9063 DstUnused::UNUSED_PRESERVE); 9064 } 9065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9066 break; 9067 9068 case SIInstrFlags::VOP2: 9069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9070 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9072 } 9073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9077 break; 9078 9079 case SIInstrFlags::VOPC: 9080 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 9081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9084 break; 9085 9086 default: 9087 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9088 } 9089 } 9090 9091 // special case v_mac_{f16, f32}: 9092 // it has src2 register operand that is tied to dst operand 9093 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9094 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9095 auto it = Inst.begin(); 9096 std::advance( 9097 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9098 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9099 } 9100 } 9101 9102 //===----------------------------------------------------------------------===// 9103 // mAI 9104 //===----------------------------------------------------------------------===// 9105 9106 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 9107 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 9108 } 9109 9110 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 9111 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 9112 } 9113 9114 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 9115 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 9116 } 9117 9118 /// Force static initialization. 9119 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9120 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 9121 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9122 } 9123 9124 #define GET_REGISTER_MATCHER 9125 #define GET_MATCHER_IMPLEMENTATION 9126 #define GET_MNEMONIC_SPELL_CHECKER 9127 #define GET_MNEMONIC_CHECKER 9128 #include "AMDGPUGenAsmMatcher.inc" 9129 9130 // This function should be defined after auto-generated include so that we have 9131 // MatchClassKind enum defined 9132 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9133 unsigned Kind) { 9134 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9135 // But MatchInstructionImpl() expects to meet token and fails to validate 9136 // operand. This method checks if we are given immediate operand but expect to 9137 // get corresponding token. 9138 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9139 switch (Kind) { 9140 case MCK_addr64: 9141 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9142 case MCK_gds: 9143 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9144 case MCK_lds: 9145 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9146 case MCK_idxen: 9147 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9148 case MCK_offen: 9149 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9150 case MCK_SSrcB32: 9151 // When operands have expression values, they will return true for isToken, 9152 // because it is not possible to distinguish between a token and an 9153 // expression at parse time. MatchInstructionImpl() will always try to 9154 // match an operand as a token, when isToken returns true, and when the 9155 // name of the expression is not a valid token, the match will fail, 9156 // so we need to handle it here. 9157 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9158 case MCK_SSrcF32: 9159 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9160 case MCK_SoppBrTarget: 9161 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9162 case MCK_VReg32OrOff: 9163 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9164 case MCK_InterpSlot: 9165 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9166 case MCK_Attr: 9167 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9168 case MCK_AttrChan: 9169 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9170 case MCK_ImmSMEMOffset: 9171 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9172 case MCK_SReg_64: 9173 case MCK_SReg_64_XEXEC: 9174 // Null is defined as a 32-bit register but 9175 // it should also be enabled with 64-bit operands. 9176 // The following code enables it for SReg_64 operands 9177 // used as source and destination. Remaining source 9178 // operands are handled in isInlinableImm. 9179 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9180 default: 9181 return Match_InvalidOperand; 9182 } 9183 } 9184 9185 //===----------------------------------------------------------------------===// 9186 // endpgm 9187 //===----------------------------------------------------------------------===// 9188 9189 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9190 SMLoc S = getLoc(); 9191 int64_t Imm = 0; 9192 9193 if (!parseExpr(Imm)) { 9194 // The operand is optional, if not present default to 0 9195 Imm = 0; 9196 } 9197 9198 if (!isUInt<16>(Imm)) { 9199 Error(S, "expected a 16-bit value"); 9200 return MatchOperand_ParseFail; 9201 } 9202 9203 Operands.push_back( 9204 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9205 return MatchOperand_Success; 9206 } 9207 9208 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9209 9210 //===----------------------------------------------------------------------===// 9211 // LDSDIR 9212 //===----------------------------------------------------------------------===// 9213 9214 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9215 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9216 } 9217 9218 bool AMDGPUOperand::isWaitVDST() const { 9219 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9220 } 9221 9222 //===----------------------------------------------------------------------===// 9223 // VINTERP 9224 //===----------------------------------------------------------------------===// 9225 9226 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9227 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9228 } 9229 9230 bool AMDGPUOperand::isWaitEXP() const { 9231 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9232 } 9233