1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(bool ShouldSkipSpace = true); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1742 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1743 OptionalImmIndexMap &OptionalIdx); 1744 1745 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1746 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1747 1748 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1749 bool IsAtomic = false); 1750 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1751 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1752 1753 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1754 1755 bool parseDimId(unsigned &Encoding); 1756 OperandMatchResultTy parseDim(OperandVector &Operands); 1757 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1758 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1759 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1760 int64_t parseDPPCtrlSel(StringRef Ctrl); 1761 int64_t parseDPPCtrlPerm(); 1762 AMDGPUOperand::Ptr defaultRowMask() const; 1763 AMDGPUOperand::Ptr defaultBankMask() const; 1764 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1765 AMDGPUOperand::Ptr defaultFI() const; 1766 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1767 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1768 cvtDPP(Inst, Operands, true); 1769 } 1770 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1771 bool IsDPP8 = false); 1772 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1773 cvtVOPCNoDstDPP(Inst, Operands, true); 1774 } 1775 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1776 bool IsDPP8 = false); 1777 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1778 cvtVOP3DPP(Inst, Operands, true); 1779 } 1780 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1781 bool IsDPP8 = false); 1782 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1783 cvtVOPC64NoDstDPP(Inst, Operands, true); 1784 } 1785 1786 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1787 AMDGPUOperand::ImmTy Type); 1788 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1789 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1794 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1795 uint64_t BasicInstType, 1796 bool SkipDstVcc = false, 1797 bool SkipSrcVcc = false); 1798 1799 AMDGPUOperand::Ptr defaultBLGP() const; 1800 AMDGPUOperand::Ptr defaultCBSZ() const; 1801 AMDGPUOperand::Ptr defaultABID() const; 1802 1803 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1804 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1805 1806 AMDGPUOperand::Ptr defaultWaitVDST() const; 1807 AMDGPUOperand::Ptr defaultWaitEXP() const; 1808 OperandMatchResultTy parseVOPD(OperandVector &Operands); 1809 }; 1810 1811 struct OptionalOperand { 1812 const char *Name; 1813 AMDGPUOperand::ImmTy Type; 1814 bool IsBit; 1815 bool (*ConvertResult)(int64_t&); 1816 }; 1817 1818 } // end anonymous namespace 1819 1820 // May be called with integer type with equivalent bitwidth. 1821 static const fltSemantics *getFltSemantics(unsigned Size) { 1822 switch (Size) { 1823 case 4: 1824 return &APFloat::IEEEsingle(); 1825 case 8: 1826 return &APFloat::IEEEdouble(); 1827 case 2: 1828 return &APFloat::IEEEhalf(); 1829 default: 1830 llvm_unreachable("unsupported fp type"); 1831 } 1832 } 1833 1834 static const fltSemantics *getFltSemantics(MVT VT) { 1835 return getFltSemantics(VT.getSizeInBits() / 8); 1836 } 1837 1838 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1839 switch (OperandType) { 1840 case AMDGPU::OPERAND_REG_IMM_INT32: 1841 case AMDGPU::OPERAND_REG_IMM_FP32: 1842 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1843 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1844 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1845 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1846 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1848 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1849 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1850 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1851 case AMDGPU::OPERAND_KIMM32: 1852 return &APFloat::IEEEsingle(); 1853 case AMDGPU::OPERAND_REG_IMM_INT64: 1854 case AMDGPU::OPERAND_REG_IMM_FP64: 1855 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1856 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1857 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1858 return &APFloat::IEEEdouble(); 1859 case AMDGPU::OPERAND_REG_IMM_INT16: 1860 case AMDGPU::OPERAND_REG_IMM_FP16: 1861 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1862 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1869 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1870 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1871 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1872 case AMDGPU::OPERAND_KIMM16: 1873 return &APFloat::IEEEhalf(); 1874 default: 1875 llvm_unreachable("unsupported fp type"); 1876 } 1877 } 1878 1879 //===----------------------------------------------------------------------===// 1880 // Operand 1881 //===----------------------------------------------------------------------===// 1882 1883 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1884 bool Lost; 1885 1886 // Convert literal to single precision 1887 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1888 APFloat::rmNearestTiesToEven, 1889 &Lost); 1890 // We allow precision lost but not overflow or underflow 1891 if (Status != APFloat::opOK && 1892 Lost && 1893 ((Status & APFloat::opOverflow) != 0 || 1894 (Status & APFloat::opUnderflow) != 0)) { 1895 return false; 1896 } 1897 1898 return true; 1899 } 1900 1901 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1902 return isUIntN(Size, Val) || isIntN(Size, Val); 1903 } 1904 1905 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1906 if (VT.getScalarType() == MVT::i16) { 1907 // FP immediate values are broken. 1908 return isInlinableIntLiteral(Val); 1909 } 1910 1911 // f16/v2f16 operands work correctly for all values. 1912 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1913 } 1914 1915 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1916 1917 // This is a hack to enable named inline values like 1918 // shared_base with both 32-bit and 64-bit operands. 1919 // Note that these values are defined as 1920 // 32-bit operands only. 1921 if (isInlineValue()) { 1922 return true; 1923 } 1924 1925 if (!isImmTy(ImmTyNone)) { 1926 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1927 return false; 1928 } 1929 // TODO: We should avoid using host float here. It would be better to 1930 // check the float bit values which is what a few other places do. 1931 // We've had bot failures before due to weird NaN support on mips hosts. 1932 1933 APInt Literal(64, Imm.Val); 1934 1935 if (Imm.IsFPImm) { // We got fp literal token 1936 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1937 return AMDGPU::isInlinableLiteral64(Imm.Val, 1938 AsmParser->hasInv2PiInlineImm()); 1939 } 1940 1941 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1942 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1943 return false; 1944 1945 if (type.getScalarSizeInBits() == 16) { 1946 return isInlineableLiteralOp16( 1947 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1948 type, AsmParser->hasInv2PiInlineImm()); 1949 } 1950 1951 // Check if single precision literal is inlinable 1952 return AMDGPU::isInlinableLiteral32( 1953 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1954 AsmParser->hasInv2PiInlineImm()); 1955 } 1956 1957 // We got int literal token. 1958 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1959 return AMDGPU::isInlinableLiteral64(Imm.Val, 1960 AsmParser->hasInv2PiInlineImm()); 1961 } 1962 1963 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1964 return false; 1965 } 1966 1967 if (type.getScalarSizeInBits() == 16) { 1968 return isInlineableLiteralOp16( 1969 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1970 type, AsmParser->hasInv2PiInlineImm()); 1971 } 1972 1973 return AMDGPU::isInlinableLiteral32( 1974 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1975 AsmParser->hasInv2PiInlineImm()); 1976 } 1977 1978 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1979 // Check that this immediate can be added as literal 1980 if (!isImmTy(ImmTyNone)) { 1981 return false; 1982 } 1983 1984 if (!Imm.IsFPImm) { 1985 // We got int literal token. 1986 1987 if (type == MVT::f64 && hasFPModifiers()) { 1988 // Cannot apply fp modifiers to int literals preserving the same semantics 1989 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1990 // disable these cases. 1991 return false; 1992 } 1993 1994 unsigned Size = type.getSizeInBits(); 1995 if (Size == 64) 1996 Size = 32; 1997 1998 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1999 // types. 2000 return isSafeTruncation(Imm.Val, Size); 2001 } 2002 2003 // We got fp literal token 2004 if (type == MVT::f64) { // Expected 64-bit fp operand 2005 // We would set low 64-bits of literal to zeroes but we accept this literals 2006 return true; 2007 } 2008 2009 if (type == MVT::i64) { // Expected 64-bit int operand 2010 // We don't allow fp literals in 64-bit integer instructions. It is 2011 // unclear how we should encode them. 2012 return false; 2013 } 2014 2015 // We allow fp literals with f16x2 operands assuming that the specified 2016 // literal goes into the lower half and the upper half is zero. We also 2017 // require that the literal may be losslessly converted to f16. 2018 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2019 (type == MVT::v2i16)? MVT::i16 : 2020 (type == MVT::v2f32)? MVT::f32 : type; 2021 2022 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2023 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2024 } 2025 2026 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2027 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2028 } 2029 2030 bool AMDGPUOperand::isVRegWithInputMods() const { 2031 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2032 // GFX90A allows DPP on 64-bit operands. 2033 (isRegClass(AMDGPU::VReg_64RegClassID) && 2034 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2035 } 2036 2037 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2038 if (AsmParser->isVI()) 2039 return isVReg32(); 2040 else if (AsmParser->isGFX9Plus()) 2041 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2042 else 2043 return false; 2044 } 2045 2046 bool AMDGPUOperand::isSDWAFP16Operand() const { 2047 return isSDWAOperand(MVT::f16); 2048 } 2049 2050 bool AMDGPUOperand::isSDWAFP32Operand() const { 2051 return isSDWAOperand(MVT::f32); 2052 } 2053 2054 bool AMDGPUOperand::isSDWAInt16Operand() const { 2055 return isSDWAOperand(MVT::i16); 2056 } 2057 2058 bool AMDGPUOperand::isSDWAInt32Operand() const { 2059 return isSDWAOperand(MVT::i32); 2060 } 2061 2062 bool AMDGPUOperand::isBoolReg() const { 2063 auto FB = AsmParser->getFeatureBits(); 2064 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2065 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2066 } 2067 2068 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2069 { 2070 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2071 assert(Size == 2 || Size == 4 || Size == 8); 2072 2073 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2074 2075 if (Imm.Mods.Abs) { 2076 Val &= ~FpSignMask; 2077 } 2078 if (Imm.Mods.Neg) { 2079 Val ^= FpSignMask; 2080 } 2081 2082 return Val; 2083 } 2084 2085 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2086 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2087 Inst.getNumOperands())) { 2088 addLiteralImmOperand(Inst, Imm.Val, 2089 ApplyModifiers & 2090 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2091 } else { 2092 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2093 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2094 setImmKindNone(); 2095 } 2096 } 2097 2098 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2099 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2100 auto OpNum = Inst.getNumOperands(); 2101 // Check that this operand accepts literals 2102 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2103 2104 if (ApplyModifiers) { 2105 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2106 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2107 Val = applyInputFPModifiers(Val, Size); 2108 } 2109 2110 APInt Literal(64, Val); 2111 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2112 2113 if (Imm.IsFPImm) { // We got fp literal token 2114 switch (OpTy) { 2115 case AMDGPU::OPERAND_REG_IMM_INT64: 2116 case AMDGPU::OPERAND_REG_IMM_FP64: 2117 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2118 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2119 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2120 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2121 AsmParser->hasInv2PiInlineImm())) { 2122 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2123 setImmKindConst(); 2124 return; 2125 } 2126 2127 // Non-inlineable 2128 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2129 // For fp operands we check if low 32 bits are zeros 2130 if (Literal.getLoBits(32) != 0) { 2131 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2132 "Can't encode literal as exact 64-bit floating-point operand. " 2133 "Low 32-bits will be set to zero"); 2134 } 2135 2136 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2137 setImmKindLiteral(); 2138 return; 2139 } 2140 2141 // We don't allow fp literals in 64-bit integer instructions. It is 2142 // unclear how we should encode them. This case should be checked earlier 2143 // in predicate methods (isLiteralImm()) 2144 llvm_unreachable("fp literal in 64-bit integer instruction."); 2145 2146 case AMDGPU::OPERAND_REG_IMM_INT32: 2147 case AMDGPU::OPERAND_REG_IMM_FP32: 2148 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2149 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2150 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2151 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2152 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2153 case AMDGPU::OPERAND_REG_IMM_INT16: 2154 case AMDGPU::OPERAND_REG_IMM_FP16: 2155 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2156 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2164 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2166 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2167 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2168 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2169 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2170 case AMDGPU::OPERAND_KIMM32: 2171 case AMDGPU::OPERAND_KIMM16: { 2172 bool lost; 2173 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2174 // Convert literal to single precision 2175 FPLiteral.convert(*getOpFltSemantics(OpTy), 2176 APFloat::rmNearestTiesToEven, &lost); 2177 // We allow precision lost but not overflow or underflow. This should be 2178 // checked earlier in isLiteralImm() 2179 2180 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2181 Inst.addOperand(MCOperand::createImm(ImmVal)); 2182 setImmKindLiteral(); 2183 return; 2184 } 2185 default: 2186 llvm_unreachable("invalid operand size"); 2187 } 2188 2189 return; 2190 } 2191 2192 // We got int literal token. 2193 // Only sign extend inline immediates. 2194 switch (OpTy) { 2195 case AMDGPU::OPERAND_REG_IMM_INT32: 2196 case AMDGPU::OPERAND_REG_IMM_FP32: 2197 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2198 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2200 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2201 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2202 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2203 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2204 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2206 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2207 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2208 if (isSafeTruncation(Val, 32) && 2209 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2210 AsmParser->hasInv2PiInlineImm())) { 2211 Inst.addOperand(MCOperand::createImm(Val)); 2212 setImmKindConst(); 2213 return; 2214 } 2215 2216 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2217 setImmKindLiteral(); 2218 return; 2219 2220 case AMDGPU::OPERAND_REG_IMM_INT64: 2221 case AMDGPU::OPERAND_REG_IMM_FP64: 2222 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2223 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2224 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2225 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2226 Inst.addOperand(MCOperand::createImm(Val)); 2227 setImmKindConst(); 2228 return; 2229 } 2230 2231 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2232 setImmKindLiteral(); 2233 return; 2234 2235 case AMDGPU::OPERAND_REG_IMM_INT16: 2236 case AMDGPU::OPERAND_REG_IMM_FP16: 2237 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2238 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2240 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2241 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2242 if (isSafeTruncation(Val, 16) && 2243 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2244 AsmParser->hasInv2PiInlineImm())) { 2245 Inst.addOperand(MCOperand::createImm(Val)); 2246 setImmKindConst(); 2247 return; 2248 } 2249 2250 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2251 setImmKindLiteral(); 2252 return; 2253 2254 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2255 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2256 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2257 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2258 assert(isSafeTruncation(Val, 16)); 2259 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2260 AsmParser->hasInv2PiInlineImm())); 2261 2262 Inst.addOperand(MCOperand::createImm(Val)); 2263 return; 2264 } 2265 case AMDGPU::OPERAND_KIMM32: 2266 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2267 setImmKindNone(); 2268 return; 2269 case AMDGPU::OPERAND_KIMM16: 2270 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2271 setImmKindNone(); 2272 return; 2273 default: 2274 llvm_unreachable("invalid operand size"); 2275 } 2276 } 2277 2278 template <unsigned Bitwidth> 2279 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2280 APInt Literal(64, Imm.Val); 2281 setImmKindNone(); 2282 2283 if (!Imm.IsFPImm) { 2284 // We got int literal token. 2285 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2286 return; 2287 } 2288 2289 bool Lost; 2290 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2291 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2292 APFloat::rmNearestTiesToEven, &Lost); 2293 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2294 } 2295 2296 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2297 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2298 } 2299 2300 static bool isInlineValue(unsigned Reg) { 2301 switch (Reg) { 2302 case AMDGPU::SRC_SHARED_BASE: 2303 case AMDGPU::SRC_SHARED_LIMIT: 2304 case AMDGPU::SRC_PRIVATE_BASE: 2305 case AMDGPU::SRC_PRIVATE_LIMIT: 2306 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2307 return true; 2308 case AMDGPU::SRC_VCCZ: 2309 case AMDGPU::SRC_EXECZ: 2310 case AMDGPU::SRC_SCC: 2311 return true; 2312 case AMDGPU::SGPR_NULL: 2313 return true; 2314 default: 2315 return false; 2316 } 2317 } 2318 2319 bool AMDGPUOperand::isInlineValue() const { 2320 return isRegKind() && ::isInlineValue(getReg()); 2321 } 2322 2323 //===----------------------------------------------------------------------===// 2324 // AsmParser 2325 //===----------------------------------------------------------------------===// 2326 2327 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2328 if (Is == IS_VGPR) { 2329 switch (RegWidth) { 2330 default: return -1; 2331 case 32: 2332 return AMDGPU::VGPR_32RegClassID; 2333 case 64: 2334 return AMDGPU::VReg_64RegClassID; 2335 case 96: 2336 return AMDGPU::VReg_96RegClassID; 2337 case 128: 2338 return AMDGPU::VReg_128RegClassID; 2339 case 160: 2340 return AMDGPU::VReg_160RegClassID; 2341 case 192: 2342 return AMDGPU::VReg_192RegClassID; 2343 case 224: 2344 return AMDGPU::VReg_224RegClassID; 2345 case 256: 2346 return AMDGPU::VReg_256RegClassID; 2347 case 512: 2348 return AMDGPU::VReg_512RegClassID; 2349 case 1024: 2350 return AMDGPU::VReg_1024RegClassID; 2351 } 2352 } else if (Is == IS_TTMP) { 2353 switch (RegWidth) { 2354 default: return -1; 2355 case 32: 2356 return AMDGPU::TTMP_32RegClassID; 2357 case 64: 2358 return AMDGPU::TTMP_64RegClassID; 2359 case 128: 2360 return AMDGPU::TTMP_128RegClassID; 2361 case 256: 2362 return AMDGPU::TTMP_256RegClassID; 2363 case 512: 2364 return AMDGPU::TTMP_512RegClassID; 2365 } 2366 } else if (Is == IS_SGPR) { 2367 switch (RegWidth) { 2368 default: return -1; 2369 case 32: 2370 return AMDGPU::SGPR_32RegClassID; 2371 case 64: 2372 return AMDGPU::SGPR_64RegClassID; 2373 case 96: 2374 return AMDGPU::SGPR_96RegClassID; 2375 case 128: 2376 return AMDGPU::SGPR_128RegClassID; 2377 case 160: 2378 return AMDGPU::SGPR_160RegClassID; 2379 case 192: 2380 return AMDGPU::SGPR_192RegClassID; 2381 case 224: 2382 return AMDGPU::SGPR_224RegClassID; 2383 case 256: 2384 return AMDGPU::SGPR_256RegClassID; 2385 case 512: 2386 return AMDGPU::SGPR_512RegClassID; 2387 } 2388 } else if (Is == IS_AGPR) { 2389 switch (RegWidth) { 2390 default: return -1; 2391 case 32: 2392 return AMDGPU::AGPR_32RegClassID; 2393 case 64: 2394 return AMDGPU::AReg_64RegClassID; 2395 case 96: 2396 return AMDGPU::AReg_96RegClassID; 2397 case 128: 2398 return AMDGPU::AReg_128RegClassID; 2399 case 160: 2400 return AMDGPU::AReg_160RegClassID; 2401 case 192: 2402 return AMDGPU::AReg_192RegClassID; 2403 case 224: 2404 return AMDGPU::AReg_224RegClassID; 2405 case 256: 2406 return AMDGPU::AReg_256RegClassID; 2407 case 512: 2408 return AMDGPU::AReg_512RegClassID; 2409 case 1024: 2410 return AMDGPU::AReg_1024RegClassID; 2411 } 2412 } 2413 return -1; 2414 } 2415 2416 static unsigned getSpecialRegForName(StringRef RegName) { 2417 return StringSwitch<unsigned>(RegName) 2418 .Case("exec", AMDGPU::EXEC) 2419 .Case("vcc", AMDGPU::VCC) 2420 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2421 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2422 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2423 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2424 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2425 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2426 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2427 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2428 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2429 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2430 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2431 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2432 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2433 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2434 .Case("m0", AMDGPU::M0) 2435 .Case("vccz", AMDGPU::SRC_VCCZ) 2436 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2437 .Case("execz", AMDGPU::SRC_EXECZ) 2438 .Case("src_execz", AMDGPU::SRC_EXECZ) 2439 .Case("scc", AMDGPU::SRC_SCC) 2440 .Case("src_scc", AMDGPU::SRC_SCC) 2441 .Case("tba", AMDGPU::TBA) 2442 .Case("tma", AMDGPU::TMA) 2443 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2444 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2445 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2446 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2447 .Case("vcc_lo", AMDGPU::VCC_LO) 2448 .Case("vcc_hi", AMDGPU::VCC_HI) 2449 .Case("exec_lo", AMDGPU::EXEC_LO) 2450 .Case("exec_hi", AMDGPU::EXEC_HI) 2451 .Case("tma_lo", AMDGPU::TMA_LO) 2452 .Case("tma_hi", AMDGPU::TMA_HI) 2453 .Case("tba_lo", AMDGPU::TBA_LO) 2454 .Case("tba_hi", AMDGPU::TBA_HI) 2455 .Case("pc", AMDGPU::PC_REG) 2456 .Case("null", AMDGPU::SGPR_NULL) 2457 .Default(AMDGPU::NoRegister); 2458 } 2459 2460 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2461 SMLoc &EndLoc, bool RestoreOnFailure) { 2462 auto R = parseRegister(); 2463 if (!R) return true; 2464 assert(R->isReg()); 2465 RegNo = R->getReg(); 2466 StartLoc = R->getStartLoc(); 2467 EndLoc = R->getEndLoc(); 2468 return false; 2469 } 2470 2471 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2472 SMLoc &EndLoc) { 2473 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2474 } 2475 2476 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2477 SMLoc &StartLoc, 2478 SMLoc &EndLoc) { 2479 bool Result = 2480 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2481 bool PendingErrors = getParser().hasPendingError(); 2482 getParser().clearPendingErrors(); 2483 if (PendingErrors) 2484 return MatchOperand_ParseFail; 2485 if (Result) 2486 return MatchOperand_NoMatch; 2487 return MatchOperand_Success; 2488 } 2489 2490 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2491 RegisterKind RegKind, unsigned Reg1, 2492 SMLoc Loc) { 2493 switch (RegKind) { 2494 case IS_SPECIAL: 2495 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2496 Reg = AMDGPU::EXEC; 2497 RegWidth = 64; 2498 return true; 2499 } 2500 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2501 Reg = AMDGPU::FLAT_SCR; 2502 RegWidth = 64; 2503 return true; 2504 } 2505 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2506 Reg = AMDGPU::XNACK_MASK; 2507 RegWidth = 64; 2508 return true; 2509 } 2510 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2511 Reg = AMDGPU::VCC; 2512 RegWidth = 64; 2513 return true; 2514 } 2515 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2516 Reg = AMDGPU::TBA; 2517 RegWidth = 64; 2518 return true; 2519 } 2520 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2521 Reg = AMDGPU::TMA; 2522 RegWidth = 64; 2523 return true; 2524 } 2525 Error(Loc, "register does not fit in the list"); 2526 return false; 2527 case IS_VGPR: 2528 case IS_SGPR: 2529 case IS_AGPR: 2530 case IS_TTMP: 2531 if (Reg1 != Reg + RegWidth / 32) { 2532 Error(Loc, "registers in a list must have consecutive indices"); 2533 return false; 2534 } 2535 RegWidth += 32; 2536 return true; 2537 default: 2538 llvm_unreachable("unexpected register kind"); 2539 } 2540 } 2541 2542 struct RegInfo { 2543 StringLiteral Name; 2544 RegisterKind Kind; 2545 }; 2546 2547 static constexpr RegInfo RegularRegisters[] = { 2548 {{"v"}, IS_VGPR}, 2549 {{"s"}, IS_SGPR}, 2550 {{"ttmp"}, IS_TTMP}, 2551 {{"acc"}, IS_AGPR}, 2552 {{"a"}, IS_AGPR}, 2553 }; 2554 2555 static bool isRegularReg(RegisterKind Kind) { 2556 return Kind == IS_VGPR || 2557 Kind == IS_SGPR || 2558 Kind == IS_TTMP || 2559 Kind == IS_AGPR; 2560 } 2561 2562 static const RegInfo* getRegularRegInfo(StringRef Str) { 2563 for (const RegInfo &Reg : RegularRegisters) 2564 if (Str.startswith(Reg.Name)) 2565 return &Reg; 2566 return nullptr; 2567 } 2568 2569 static bool getRegNum(StringRef Str, unsigned& Num) { 2570 return !Str.getAsInteger(10, Num); 2571 } 2572 2573 bool 2574 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2575 const AsmToken &NextToken) const { 2576 2577 // A list of consecutive registers: [s0,s1,s2,s3] 2578 if (Token.is(AsmToken::LBrac)) 2579 return true; 2580 2581 if (!Token.is(AsmToken::Identifier)) 2582 return false; 2583 2584 // A single register like s0 or a range of registers like s[0:1] 2585 2586 StringRef Str = Token.getString(); 2587 const RegInfo *Reg = getRegularRegInfo(Str); 2588 if (Reg) { 2589 StringRef RegName = Reg->Name; 2590 StringRef RegSuffix = Str.substr(RegName.size()); 2591 if (!RegSuffix.empty()) { 2592 unsigned Num; 2593 // A single register with an index: rXX 2594 if (getRegNum(RegSuffix, Num)) 2595 return true; 2596 } else { 2597 // A range of registers: r[XX:YY]. 2598 if (NextToken.is(AsmToken::LBrac)) 2599 return true; 2600 } 2601 } 2602 2603 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2604 } 2605 2606 bool 2607 AMDGPUAsmParser::isRegister() 2608 { 2609 return isRegister(getToken(), peekToken()); 2610 } 2611 2612 unsigned 2613 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2614 unsigned RegNum, 2615 unsigned RegWidth, 2616 SMLoc Loc) { 2617 2618 assert(isRegularReg(RegKind)); 2619 2620 unsigned AlignSize = 1; 2621 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2622 // SGPR and TTMP registers must be aligned. 2623 // Max required alignment is 4 dwords. 2624 AlignSize = std::min(RegWidth / 32, 4u); 2625 } 2626 2627 if (RegNum % AlignSize != 0) { 2628 Error(Loc, "invalid register alignment"); 2629 return AMDGPU::NoRegister; 2630 } 2631 2632 unsigned RegIdx = RegNum / AlignSize; 2633 int RCID = getRegClass(RegKind, RegWidth); 2634 if (RCID == -1) { 2635 Error(Loc, "invalid or unsupported register size"); 2636 return AMDGPU::NoRegister; 2637 } 2638 2639 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2640 const MCRegisterClass RC = TRI->getRegClass(RCID); 2641 if (RegIdx >= RC.getNumRegs()) { 2642 Error(Loc, "register index is out of range"); 2643 return AMDGPU::NoRegister; 2644 } 2645 2646 return RC.getRegister(RegIdx); 2647 } 2648 2649 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2650 int64_t RegLo, RegHi; 2651 if (!skipToken(AsmToken::LBrac, "missing register index")) 2652 return false; 2653 2654 SMLoc FirstIdxLoc = getLoc(); 2655 SMLoc SecondIdxLoc; 2656 2657 if (!parseExpr(RegLo)) 2658 return false; 2659 2660 if (trySkipToken(AsmToken::Colon)) { 2661 SecondIdxLoc = getLoc(); 2662 if (!parseExpr(RegHi)) 2663 return false; 2664 } else { 2665 RegHi = RegLo; 2666 } 2667 2668 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2669 return false; 2670 2671 if (!isUInt<32>(RegLo)) { 2672 Error(FirstIdxLoc, "invalid register index"); 2673 return false; 2674 } 2675 2676 if (!isUInt<32>(RegHi)) { 2677 Error(SecondIdxLoc, "invalid register index"); 2678 return false; 2679 } 2680 2681 if (RegLo > RegHi) { 2682 Error(FirstIdxLoc, "first register index should not exceed second index"); 2683 return false; 2684 } 2685 2686 Num = static_cast<unsigned>(RegLo); 2687 RegWidth = 32 * ((RegHi - RegLo) + 1); 2688 return true; 2689 } 2690 2691 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2692 unsigned &RegNum, unsigned &RegWidth, 2693 SmallVectorImpl<AsmToken> &Tokens) { 2694 assert(isToken(AsmToken::Identifier)); 2695 unsigned Reg = getSpecialRegForName(getTokenStr()); 2696 if (Reg) { 2697 RegNum = 0; 2698 RegWidth = 32; 2699 RegKind = IS_SPECIAL; 2700 Tokens.push_back(getToken()); 2701 lex(); // skip register name 2702 } 2703 return Reg; 2704 } 2705 2706 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2707 unsigned &RegNum, unsigned &RegWidth, 2708 SmallVectorImpl<AsmToken> &Tokens) { 2709 assert(isToken(AsmToken::Identifier)); 2710 StringRef RegName = getTokenStr(); 2711 auto Loc = getLoc(); 2712 2713 const RegInfo *RI = getRegularRegInfo(RegName); 2714 if (!RI) { 2715 Error(Loc, "invalid register name"); 2716 return AMDGPU::NoRegister; 2717 } 2718 2719 Tokens.push_back(getToken()); 2720 lex(); // skip register name 2721 2722 RegKind = RI->Kind; 2723 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2724 if (!RegSuffix.empty()) { 2725 // Single 32-bit register: vXX. 2726 if (!getRegNum(RegSuffix, RegNum)) { 2727 Error(Loc, "invalid register index"); 2728 return AMDGPU::NoRegister; 2729 } 2730 RegWidth = 32; 2731 } else { 2732 // Range of registers: v[XX:YY]. ":YY" is optional. 2733 if (!ParseRegRange(RegNum, RegWidth)) 2734 return AMDGPU::NoRegister; 2735 } 2736 2737 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2738 } 2739 2740 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2741 unsigned &RegWidth, 2742 SmallVectorImpl<AsmToken> &Tokens) { 2743 unsigned Reg = AMDGPU::NoRegister; 2744 auto ListLoc = getLoc(); 2745 2746 if (!skipToken(AsmToken::LBrac, 2747 "expected a register or a list of registers")) { 2748 return AMDGPU::NoRegister; 2749 } 2750 2751 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2752 2753 auto Loc = getLoc(); 2754 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2755 return AMDGPU::NoRegister; 2756 if (RegWidth != 32) { 2757 Error(Loc, "expected a single 32-bit register"); 2758 return AMDGPU::NoRegister; 2759 } 2760 2761 for (; trySkipToken(AsmToken::Comma); ) { 2762 RegisterKind NextRegKind; 2763 unsigned NextReg, NextRegNum, NextRegWidth; 2764 Loc = getLoc(); 2765 2766 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2767 NextRegNum, NextRegWidth, 2768 Tokens)) { 2769 return AMDGPU::NoRegister; 2770 } 2771 if (NextRegWidth != 32) { 2772 Error(Loc, "expected a single 32-bit register"); 2773 return AMDGPU::NoRegister; 2774 } 2775 if (NextRegKind != RegKind) { 2776 Error(Loc, "registers in a list must be of the same kind"); 2777 return AMDGPU::NoRegister; 2778 } 2779 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2780 return AMDGPU::NoRegister; 2781 } 2782 2783 if (!skipToken(AsmToken::RBrac, 2784 "expected a comma or a closing square bracket")) { 2785 return AMDGPU::NoRegister; 2786 } 2787 2788 if (isRegularReg(RegKind)) 2789 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2790 2791 return Reg; 2792 } 2793 2794 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2795 unsigned &RegNum, unsigned &RegWidth, 2796 SmallVectorImpl<AsmToken> &Tokens) { 2797 auto Loc = getLoc(); 2798 Reg = AMDGPU::NoRegister; 2799 2800 if (isToken(AsmToken::Identifier)) { 2801 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2802 if (Reg == AMDGPU::NoRegister) 2803 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2804 } else { 2805 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2806 } 2807 2808 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2809 if (Reg == AMDGPU::NoRegister) { 2810 assert(Parser.hasPendingError()); 2811 return false; 2812 } 2813 2814 if (!subtargetHasRegister(*TRI, Reg)) { 2815 if (Reg == AMDGPU::SGPR_NULL) { 2816 Error(Loc, "'null' operand is not supported on this GPU"); 2817 } else { 2818 Error(Loc, "register not available on this GPU"); 2819 } 2820 return false; 2821 } 2822 2823 return true; 2824 } 2825 2826 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2827 unsigned &RegNum, unsigned &RegWidth, 2828 bool RestoreOnFailure /*=false*/) { 2829 Reg = AMDGPU::NoRegister; 2830 2831 SmallVector<AsmToken, 1> Tokens; 2832 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2833 if (RestoreOnFailure) { 2834 while (!Tokens.empty()) { 2835 getLexer().UnLex(Tokens.pop_back_val()); 2836 } 2837 } 2838 return true; 2839 } 2840 return false; 2841 } 2842 2843 Optional<StringRef> 2844 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2845 switch (RegKind) { 2846 case IS_VGPR: 2847 return StringRef(".amdgcn.next_free_vgpr"); 2848 case IS_SGPR: 2849 return StringRef(".amdgcn.next_free_sgpr"); 2850 default: 2851 return None; 2852 } 2853 } 2854 2855 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2856 auto SymbolName = getGprCountSymbolName(RegKind); 2857 assert(SymbolName && "initializing invalid register kind"); 2858 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2859 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2860 } 2861 2862 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2863 unsigned DwordRegIndex, 2864 unsigned RegWidth) { 2865 // Symbols are only defined for GCN targets 2866 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2867 return true; 2868 2869 auto SymbolName = getGprCountSymbolName(RegKind); 2870 if (!SymbolName) 2871 return true; 2872 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2873 2874 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2875 int64_t OldCount; 2876 2877 if (!Sym->isVariable()) 2878 return !Error(getLoc(), 2879 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2880 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2881 return !Error( 2882 getLoc(), 2883 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2884 2885 if (OldCount <= NewMax) 2886 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2887 2888 return true; 2889 } 2890 2891 std::unique_ptr<AMDGPUOperand> 2892 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2893 const auto &Tok = getToken(); 2894 SMLoc StartLoc = Tok.getLoc(); 2895 SMLoc EndLoc = Tok.getEndLoc(); 2896 RegisterKind RegKind; 2897 unsigned Reg, RegNum, RegWidth; 2898 2899 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2900 return nullptr; 2901 } 2902 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2903 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2904 return nullptr; 2905 } else 2906 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2907 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2908 } 2909 2910 OperandMatchResultTy 2911 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2912 // TODO: add syntactic sugar for 1/(2*PI) 2913 2914 if (isRegister()) 2915 return MatchOperand_NoMatch; 2916 assert(!isModifier()); 2917 2918 const auto& Tok = getToken(); 2919 const auto& NextTok = peekToken(); 2920 bool IsReal = Tok.is(AsmToken::Real); 2921 SMLoc S = getLoc(); 2922 bool Negate = false; 2923 2924 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2925 lex(); 2926 IsReal = true; 2927 Negate = true; 2928 } 2929 2930 if (IsReal) { 2931 // Floating-point expressions are not supported. 2932 // Can only allow floating-point literals with an 2933 // optional sign. 2934 2935 StringRef Num = getTokenStr(); 2936 lex(); 2937 2938 APFloat RealVal(APFloat::IEEEdouble()); 2939 auto roundMode = APFloat::rmNearestTiesToEven; 2940 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2941 return MatchOperand_ParseFail; 2942 } 2943 if (Negate) 2944 RealVal.changeSign(); 2945 2946 Operands.push_back( 2947 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2948 AMDGPUOperand::ImmTyNone, true)); 2949 2950 return MatchOperand_Success; 2951 2952 } else { 2953 int64_t IntVal; 2954 const MCExpr *Expr; 2955 SMLoc S = getLoc(); 2956 2957 if (HasSP3AbsModifier) { 2958 // This is a workaround for handling expressions 2959 // as arguments of SP3 'abs' modifier, for example: 2960 // |1.0| 2961 // |-1| 2962 // |1+x| 2963 // This syntax is not compatible with syntax of standard 2964 // MC expressions (due to the trailing '|'). 2965 SMLoc EndLoc; 2966 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2967 return MatchOperand_ParseFail; 2968 } else { 2969 if (Parser.parseExpression(Expr)) 2970 return MatchOperand_ParseFail; 2971 } 2972 2973 if (Expr->evaluateAsAbsolute(IntVal)) { 2974 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2975 } else { 2976 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2977 } 2978 2979 return MatchOperand_Success; 2980 } 2981 2982 return MatchOperand_NoMatch; 2983 } 2984 2985 OperandMatchResultTy 2986 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2987 if (!isRegister()) 2988 return MatchOperand_NoMatch; 2989 2990 if (auto R = parseRegister()) { 2991 assert(R->isReg()); 2992 Operands.push_back(std::move(R)); 2993 return MatchOperand_Success; 2994 } 2995 return MatchOperand_ParseFail; 2996 } 2997 2998 OperandMatchResultTy 2999 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 3000 auto res = parseReg(Operands); 3001 if (res != MatchOperand_NoMatch) { 3002 return res; 3003 } else if (isModifier()) { 3004 return MatchOperand_NoMatch; 3005 } else { 3006 return parseImm(Operands, HasSP3AbsMod); 3007 } 3008 } 3009 3010 bool 3011 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3012 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3013 const auto &str = Token.getString(); 3014 return str == "abs" || str == "neg" || str == "sext"; 3015 } 3016 return false; 3017 } 3018 3019 bool 3020 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3021 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3022 } 3023 3024 bool 3025 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3026 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3027 } 3028 3029 bool 3030 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3031 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3032 } 3033 3034 // Check if this is an operand modifier or an opcode modifier 3035 // which may look like an expression but it is not. We should 3036 // avoid parsing these modifiers as expressions. Currently 3037 // recognized sequences are: 3038 // |...| 3039 // abs(...) 3040 // neg(...) 3041 // sext(...) 3042 // -reg 3043 // -|...| 3044 // -abs(...) 3045 // name:... 3046 // Note that simple opcode modifiers like 'gds' may be parsed as 3047 // expressions; this is a special case. See getExpressionAsToken. 3048 // 3049 bool 3050 AMDGPUAsmParser::isModifier() { 3051 3052 AsmToken Tok = getToken(); 3053 AsmToken NextToken[2]; 3054 peekTokens(NextToken); 3055 3056 return isOperandModifier(Tok, NextToken[0]) || 3057 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3058 isOpcodeModifierWithVal(Tok, NextToken[0]); 3059 } 3060 3061 // Check if the current token is an SP3 'neg' modifier. 3062 // Currently this modifier is allowed in the following context: 3063 // 3064 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3065 // 2. Before an 'abs' modifier: -abs(...) 3066 // 3. Before an SP3 'abs' modifier: -|...| 3067 // 3068 // In all other cases "-" is handled as a part 3069 // of an expression that follows the sign. 3070 // 3071 // Note: When "-" is followed by an integer literal, 3072 // this is interpreted as integer negation rather 3073 // than a floating-point NEG modifier applied to N. 3074 // Beside being contr-intuitive, such use of floating-point 3075 // NEG modifier would have resulted in different meaning 3076 // of integer literals used with VOP1/2/C and VOP3, 3077 // for example: 3078 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3079 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3080 // Negative fp literals with preceding "-" are 3081 // handled likewise for uniformity 3082 // 3083 bool 3084 AMDGPUAsmParser::parseSP3NegModifier() { 3085 3086 AsmToken NextToken[2]; 3087 peekTokens(NextToken); 3088 3089 if (isToken(AsmToken::Minus) && 3090 (isRegister(NextToken[0], NextToken[1]) || 3091 NextToken[0].is(AsmToken::Pipe) || 3092 isId(NextToken[0], "abs"))) { 3093 lex(); 3094 return true; 3095 } 3096 3097 return false; 3098 } 3099 3100 OperandMatchResultTy 3101 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3102 bool AllowImm) { 3103 bool Neg, SP3Neg; 3104 bool Abs, SP3Abs; 3105 SMLoc Loc; 3106 3107 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3108 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3109 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3110 return MatchOperand_ParseFail; 3111 } 3112 3113 SP3Neg = parseSP3NegModifier(); 3114 3115 Loc = getLoc(); 3116 Neg = trySkipId("neg"); 3117 if (Neg && SP3Neg) { 3118 Error(Loc, "expected register or immediate"); 3119 return MatchOperand_ParseFail; 3120 } 3121 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3122 return MatchOperand_ParseFail; 3123 3124 Abs = trySkipId("abs"); 3125 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3126 return MatchOperand_ParseFail; 3127 3128 Loc = getLoc(); 3129 SP3Abs = trySkipToken(AsmToken::Pipe); 3130 if (Abs && SP3Abs) { 3131 Error(Loc, "expected register or immediate"); 3132 return MatchOperand_ParseFail; 3133 } 3134 3135 OperandMatchResultTy Res; 3136 if (AllowImm) { 3137 Res = parseRegOrImm(Operands, SP3Abs); 3138 } else { 3139 Res = parseReg(Operands); 3140 } 3141 if (Res != MatchOperand_Success) { 3142 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3143 } 3144 3145 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3146 return MatchOperand_ParseFail; 3147 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3148 return MatchOperand_ParseFail; 3149 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3150 return MatchOperand_ParseFail; 3151 3152 AMDGPUOperand::Modifiers Mods; 3153 Mods.Abs = Abs || SP3Abs; 3154 Mods.Neg = Neg || SP3Neg; 3155 3156 if (Mods.hasFPModifiers()) { 3157 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3158 if (Op.isExpr()) { 3159 Error(Op.getStartLoc(), "expected an absolute expression"); 3160 return MatchOperand_ParseFail; 3161 } 3162 Op.setModifiers(Mods); 3163 } 3164 return MatchOperand_Success; 3165 } 3166 3167 OperandMatchResultTy 3168 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3169 bool AllowImm) { 3170 bool Sext = trySkipId("sext"); 3171 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3172 return MatchOperand_ParseFail; 3173 3174 OperandMatchResultTy Res; 3175 if (AllowImm) { 3176 Res = parseRegOrImm(Operands); 3177 } else { 3178 Res = parseReg(Operands); 3179 } 3180 if (Res != MatchOperand_Success) { 3181 return Sext? MatchOperand_ParseFail : Res; 3182 } 3183 3184 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3185 return MatchOperand_ParseFail; 3186 3187 AMDGPUOperand::Modifiers Mods; 3188 Mods.Sext = Sext; 3189 3190 if (Mods.hasIntModifiers()) { 3191 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3192 if (Op.isExpr()) { 3193 Error(Op.getStartLoc(), "expected an absolute expression"); 3194 return MatchOperand_ParseFail; 3195 } 3196 Op.setModifiers(Mods); 3197 } 3198 3199 return MatchOperand_Success; 3200 } 3201 3202 OperandMatchResultTy 3203 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3204 return parseRegOrImmWithFPInputMods(Operands, false); 3205 } 3206 3207 OperandMatchResultTy 3208 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3209 return parseRegOrImmWithIntInputMods(Operands, false); 3210 } 3211 3212 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3213 auto Loc = getLoc(); 3214 if (trySkipId("off")) { 3215 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3216 AMDGPUOperand::ImmTyOff, false)); 3217 return MatchOperand_Success; 3218 } 3219 3220 if (!isRegister()) 3221 return MatchOperand_NoMatch; 3222 3223 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3224 if (Reg) { 3225 Operands.push_back(std::move(Reg)); 3226 return MatchOperand_Success; 3227 } 3228 3229 return MatchOperand_ParseFail; 3230 3231 } 3232 3233 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3234 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3235 3236 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3237 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3238 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3239 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3240 return Match_InvalidOperand; 3241 3242 if ((TSFlags & SIInstrFlags::VOP3) && 3243 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3244 getForcedEncodingSize() != 64) 3245 return Match_PreferE32; 3246 3247 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3248 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3249 // v_mac_f32/16 allow only dst_sel == DWORD; 3250 auto OpNum = 3251 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3252 const auto &Op = Inst.getOperand(OpNum); 3253 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3254 return Match_InvalidOperand; 3255 } 3256 } 3257 3258 return Match_Success; 3259 } 3260 3261 static ArrayRef<unsigned> getAllVariants() { 3262 static const unsigned Variants[] = { 3263 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3264 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3265 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3266 }; 3267 3268 return makeArrayRef(Variants); 3269 } 3270 3271 // What asm variants we should check 3272 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3273 if (isForcedDPP() && isForcedVOP3()) { 3274 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3275 return makeArrayRef(Variants); 3276 } 3277 if (getForcedEncodingSize() == 32) { 3278 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3279 return makeArrayRef(Variants); 3280 } 3281 3282 if (isForcedVOP3()) { 3283 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3284 return makeArrayRef(Variants); 3285 } 3286 3287 if (isForcedSDWA()) { 3288 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3289 AMDGPUAsmVariants::SDWA9}; 3290 return makeArrayRef(Variants); 3291 } 3292 3293 if (isForcedDPP()) { 3294 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3295 return makeArrayRef(Variants); 3296 } 3297 3298 return getAllVariants(); 3299 } 3300 3301 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3302 if (isForcedDPP() && isForcedVOP3()) 3303 return "e64_dpp"; 3304 3305 if (getForcedEncodingSize() == 32) 3306 return "e32"; 3307 3308 if (isForcedVOP3()) 3309 return "e64"; 3310 3311 if (isForcedSDWA()) 3312 return "sdwa"; 3313 3314 if (isForcedDPP()) 3315 return "dpp"; 3316 3317 return ""; 3318 } 3319 3320 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3321 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3322 const unsigned Num = Desc.getNumImplicitUses(); 3323 for (unsigned i = 0; i < Num; ++i) { 3324 unsigned Reg = Desc.ImplicitUses[i]; 3325 switch (Reg) { 3326 case AMDGPU::FLAT_SCR: 3327 case AMDGPU::VCC: 3328 case AMDGPU::VCC_LO: 3329 case AMDGPU::VCC_HI: 3330 case AMDGPU::M0: 3331 return Reg; 3332 default: 3333 break; 3334 } 3335 } 3336 return AMDGPU::NoRegister; 3337 } 3338 3339 // NB: This code is correct only when used to check constant 3340 // bus limitations because GFX7 support no f16 inline constants. 3341 // Note that there are no cases when a GFX7 opcode violates 3342 // constant bus limitations due to the use of an f16 constant. 3343 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3344 unsigned OpIdx) const { 3345 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3346 3347 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3348 return false; 3349 } 3350 3351 const MCOperand &MO = Inst.getOperand(OpIdx); 3352 3353 int64_t Val = MO.getImm(); 3354 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3355 3356 switch (OpSize) { // expected operand size 3357 case 8: 3358 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3359 case 4: 3360 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3361 case 2: { 3362 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3363 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3364 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3365 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3366 return AMDGPU::isInlinableIntLiteral(Val); 3367 3368 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3369 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3370 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3371 return AMDGPU::isInlinableIntLiteralV216(Val); 3372 3373 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3374 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3375 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3376 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3377 3378 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3379 } 3380 default: 3381 llvm_unreachable("invalid operand size"); 3382 } 3383 } 3384 3385 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3386 if (!isGFX10Plus()) 3387 return 1; 3388 3389 switch (Opcode) { 3390 // 64-bit shift instructions can use only one scalar value input 3391 case AMDGPU::V_LSHLREV_B64_e64: 3392 case AMDGPU::V_LSHLREV_B64_gfx10: 3393 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3394 case AMDGPU::V_LSHRREV_B64_e64: 3395 case AMDGPU::V_LSHRREV_B64_gfx10: 3396 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3397 case AMDGPU::V_ASHRREV_I64_e64: 3398 case AMDGPU::V_ASHRREV_I64_gfx10: 3399 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3400 case AMDGPU::V_LSHL_B64_e64: 3401 case AMDGPU::V_LSHR_B64_e64: 3402 case AMDGPU::V_ASHR_I64_e64: 3403 return 1; 3404 default: 3405 return 2; 3406 } 3407 } 3408 3409 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3410 const MCOperand &MO = Inst.getOperand(OpIdx); 3411 if (MO.isImm()) { 3412 return !isInlineConstant(Inst, OpIdx); 3413 } else if (MO.isReg()) { 3414 auto Reg = MO.getReg(); 3415 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3416 auto PReg = mc2PseudoReg(Reg); 3417 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3418 } else { 3419 return true; 3420 } 3421 } 3422 3423 bool 3424 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3425 const OperandVector &Operands) { 3426 const unsigned Opcode = Inst.getOpcode(); 3427 const MCInstrDesc &Desc = MII.get(Opcode); 3428 unsigned LastSGPR = AMDGPU::NoRegister; 3429 unsigned ConstantBusUseCount = 0; 3430 unsigned NumLiterals = 0; 3431 unsigned LiteralSize; 3432 3433 if (Desc.TSFlags & 3434 (SIInstrFlags::VOPC | 3435 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3436 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3437 SIInstrFlags::SDWA)) { 3438 // Check special imm operands (used by madmk, etc) 3439 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3440 ++NumLiterals; 3441 LiteralSize = 4; 3442 } 3443 3444 SmallDenseSet<unsigned> SGPRsUsed; 3445 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3446 if (SGPRUsed != AMDGPU::NoRegister) { 3447 SGPRsUsed.insert(SGPRUsed); 3448 ++ConstantBusUseCount; 3449 } 3450 3451 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3452 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3453 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3454 3455 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3456 3457 for (int OpIdx : OpIndices) { 3458 if (OpIdx == -1) break; 3459 3460 const MCOperand &MO = Inst.getOperand(OpIdx); 3461 if (usesConstantBus(Inst, OpIdx)) { 3462 if (MO.isReg()) { 3463 LastSGPR = mc2PseudoReg(MO.getReg()); 3464 // Pairs of registers with a partial intersections like these 3465 // s0, s[0:1] 3466 // flat_scratch_lo, flat_scratch 3467 // flat_scratch_lo, flat_scratch_hi 3468 // are theoretically valid but they are disabled anyway. 3469 // Note that this code mimics SIInstrInfo::verifyInstruction 3470 if (SGPRsUsed.insert(LastSGPR).second) { 3471 ++ConstantBusUseCount; 3472 } 3473 } else { // Expression or a literal 3474 3475 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3476 continue; // special operand like VINTERP attr_chan 3477 3478 // An instruction may use only one literal. 3479 // This has been validated on the previous step. 3480 // See validateVOPLiteral. 3481 // This literal may be used as more than one operand. 3482 // If all these operands are of the same size, 3483 // this literal counts as one scalar value. 3484 // Otherwise it counts as 2 scalar values. 3485 // See "GFX10 Shader Programming", section 3.6.2.3. 3486 3487 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3488 if (Size < 4) Size = 4; 3489 3490 if (NumLiterals == 0) { 3491 NumLiterals = 1; 3492 LiteralSize = Size; 3493 } else if (LiteralSize != Size) { 3494 NumLiterals = 2; 3495 } 3496 } 3497 } 3498 } 3499 } 3500 ConstantBusUseCount += NumLiterals; 3501 3502 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3503 return true; 3504 3505 SMLoc LitLoc = getLitLoc(Operands); 3506 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3507 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3508 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3509 return false; 3510 } 3511 3512 bool 3513 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3514 const OperandVector &Operands) { 3515 const unsigned Opcode = Inst.getOpcode(); 3516 const MCInstrDesc &Desc = MII.get(Opcode); 3517 3518 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3519 if (DstIdx == -1 || 3520 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3521 return true; 3522 } 3523 3524 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3525 3526 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3527 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3528 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3529 3530 assert(DstIdx != -1); 3531 const MCOperand &Dst = Inst.getOperand(DstIdx); 3532 assert(Dst.isReg()); 3533 3534 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3535 3536 for (int SrcIdx : SrcIndices) { 3537 if (SrcIdx == -1) break; 3538 const MCOperand &Src = Inst.getOperand(SrcIdx); 3539 if (Src.isReg()) { 3540 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3541 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3542 Error(getRegLoc(SrcReg, Operands), 3543 "destination must be different than all sources"); 3544 return false; 3545 } 3546 } 3547 } 3548 3549 return true; 3550 } 3551 3552 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3553 3554 const unsigned Opc = Inst.getOpcode(); 3555 const MCInstrDesc &Desc = MII.get(Opc); 3556 3557 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3558 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3559 assert(ClampIdx != -1); 3560 return Inst.getOperand(ClampIdx).getImm() == 0; 3561 } 3562 3563 return true; 3564 } 3565 3566 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3567 3568 const unsigned Opc = Inst.getOpcode(); 3569 const MCInstrDesc &Desc = MII.get(Opc); 3570 3571 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3572 return None; 3573 3574 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3575 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3576 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3577 3578 assert(VDataIdx != -1); 3579 3580 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3581 return None; 3582 3583 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3584 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3585 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3586 if (DMask == 0) 3587 DMask = 1; 3588 3589 bool isPackedD16 = false; 3590 unsigned DataSize = 3591 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3592 if (hasPackedD16()) { 3593 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3594 isPackedD16 = D16Idx >= 0; 3595 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3596 DataSize = (DataSize + 1) / 2; 3597 } 3598 3599 if ((VDataSize / 4) == DataSize + TFESize) 3600 return None; 3601 3602 return StringRef(isPackedD16 3603 ? "image data size does not match dmask, d16 and tfe" 3604 : "image data size does not match dmask and tfe"); 3605 } 3606 3607 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3608 const unsigned Opc = Inst.getOpcode(); 3609 const MCInstrDesc &Desc = MII.get(Opc); 3610 3611 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3612 return true; 3613 3614 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3615 3616 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3617 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3618 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3619 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3620 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3621 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3622 3623 assert(VAddr0Idx != -1); 3624 assert(SrsrcIdx != -1); 3625 assert(SrsrcIdx > VAddr0Idx); 3626 3627 if (DimIdx == -1) 3628 return true; // intersect_ray 3629 3630 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3631 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3632 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3633 unsigned ActualAddrSize = 3634 IsNSA ? SrsrcIdx - VAddr0Idx 3635 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3636 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3637 3638 unsigned ExpectedAddrSize = 3639 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3640 3641 if (!IsNSA) { 3642 if (ExpectedAddrSize > 8) 3643 ExpectedAddrSize = 16; 3644 3645 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3646 // This provides backward compatibility for assembly created 3647 // before 160b/192b/224b types were directly supported. 3648 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3649 return true; 3650 } 3651 3652 return ActualAddrSize == ExpectedAddrSize; 3653 } 3654 3655 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3656 3657 const unsigned Opc = Inst.getOpcode(); 3658 const MCInstrDesc &Desc = MII.get(Opc); 3659 3660 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3661 return true; 3662 if (!Desc.mayLoad() || !Desc.mayStore()) 3663 return true; // Not atomic 3664 3665 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3666 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3667 3668 // This is an incomplete check because image_atomic_cmpswap 3669 // may only use 0x3 and 0xf while other atomic operations 3670 // may use 0x1 and 0x3. However these limitations are 3671 // verified when we check that dmask matches dst size. 3672 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3673 } 3674 3675 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3676 3677 const unsigned Opc = Inst.getOpcode(); 3678 const MCInstrDesc &Desc = MII.get(Opc); 3679 3680 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3681 return true; 3682 3683 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3684 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3685 3686 // GATHER4 instructions use dmask in a different fashion compared to 3687 // other MIMG instructions. The only useful DMASK values are 3688 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3689 // (red,red,red,red) etc.) The ISA document doesn't mention 3690 // this. 3691 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3692 } 3693 3694 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3695 const unsigned Opc = Inst.getOpcode(); 3696 const MCInstrDesc &Desc = MII.get(Opc); 3697 3698 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3699 return true; 3700 3701 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3702 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3703 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3704 3705 if (!BaseOpcode->MSAA) 3706 return true; 3707 3708 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3709 assert(DimIdx != -1); 3710 3711 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3712 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3713 3714 return DimInfo->MSAA; 3715 } 3716 3717 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3718 { 3719 switch (Opcode) { 3720 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3721 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3722 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3723 return true; 3724 default: 3725 return false; 3726 } 3727 } 3728 3729 // movrels* opcodes should only allow VGPRS as src0. 3730 // This is specified in .td description for vop1/vop3, 3731 // but sdwa is handled differently. See isSDWAOperand. 3732 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3733 const OperandVector &Operands) { 3734 3735 const unsigned Opc = Inst.getOpcode(); 3736 const MCInstrDesc &Desc = MII.get(Opc); 3737 3738 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3739 return true; 3740 3741 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3742 assert(Src0Idx != -1); 3743 3744 SMLoc ErrLoc; 3745 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3746 if (Src0.isReg()) { 3747 auto Reg = mc2PseudoReg(Src0.getReg()); 3748 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3749 if (!isSGPR(Reg, TRI)) 3750 return true; 3751 ErrLoc = getRegLoc(Reg, Operands); 3752 } else { 3753 ErrLoc = getConstLoc(Operands); 3754 } 3755 3756 Error(ErrLoc, "source operand must be a VGPR"); 3757 return false; 3758 } 3759 3760 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3761 const OperandVector &Operands) { 3762 3763 const unsigned Opc = Inst.getOpcode(); 3764 3765 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3766 return true; 3767 3768 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3769 assert(Src0Idx != -1); 3770 3771 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3772 if (!Src0.isReg()) 3773 return true; 3774 3775 auto Reg = mc2PseudoReg(Src0.getReg()); 3776 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3777 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3778 Error(getRegLoc(Reg, Operands), 3779 "source operand must be either a VGPR or an inline constant"); 3780 return false; 3781 } 3782 3783 return true; 3784 } 3785 3786 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3787 const OperandVector &Operands) { 3788 const unsigned Opc = Inst.getOpcode(); 3789 const MCInstrDesc &Desc = MII.get(Opc); 3790 3791 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3792 return true; 3793 3794 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3795 if (Src2Idx == -1) 3796 return true; 3797 3798 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3799 if (!Src2.isReg()) 3800 return true; 3801 3802 MCRegister Src2Reg = Src2.getReg(); 3803 MCRegister DstReg = Inst.getOperand(0).getReg(); 3804 if (Src2Reg == DstReg) 3805 return true; 3806 3807 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3808 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3809 return true; 3810 3811 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3812 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3813 "source 2 operand must not partially overlap with dst"); 3814 return false; 3815 } 3816 3817 return true; 3818 } 3819 3820 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3821 switch (Inst.getOpcode()) { 3822 default: 3823 return true; 3824 case V_DIV_SCALE_F32_gfx6_gfx7: 3825 case V_DIV_SCALE_F32_vi: 3826 case V_DIV_SCALE_F32_gfx10: 3827 case V_DIV_SCALE_F64_gfx6_gfx7: 3828 case V_DIV_SCALE_F64_vi: 3829 case V_DIV_SCALE_F64_gfx10: 3830 break; 3831 } 3832 3833 // TODO: Check that src0 = src1 or src2. 3834 3835 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3836 AMDGPU::OpName::src2_modifiers, 3837 AMDGPU::OpName::src2_modifiers}) { 3838 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3839 .getImm() & 3840 SISrcMods::ABS) { 3841 return false; 3842 } 3843 } 3844 3845 return true; 3846 } 3847 3848 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3849 3850 const unsigned Opc = Inst.getOpcode(); 3851 const MCInstrDesc &Desc = MII.get(Opc); 3852 3853 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3854 return true; 3855 3856 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3857 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3858 if (isCI() || isSI()) 3859 return false; 3860 } 3861 3862 return true; 3863 } 3864 3865 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3866 const unsigned Opc = Inst.getOpcode(); 3867 const MCInstrDesc &Desc = MII.get(Opc); 3868 3869 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3870 return true; 3871 3872 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3873 if (DimIdx < 0) 3874 return true; 3875 3876 long Imm = Inst.getOperand(DimIdx).getImm(); 3877 if (Imm < 0 || Imm >= 8) 3878 return false; 3879 3880 return true; 3881 } 3882 3883 static bool IsRevOpcode(const unsigned Opcode) 3884 { 3885 switch (Opcode) { 3886 case AMDGPU::V_SUBREV_F32_e32: 3887 case AMDGPU::V_SUBREV_F32_e64: 3888 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3889 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_F32_e32_vi: 3891 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3892 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3893 case AMDGPU::V_SUBREV_F32_e64_vi: 3894 3895 case AMDGPU::V_SUBREV_CO_U32_e32: 3896 case AMDGPU::V_SUBREV_CO_U32_e64: 3897 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3898 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3899 3900 case AMDGPU::V_SUBBREV_U32_e32: 3901 case AMDGPU::V_SUBBREV_U32_e64: 3902 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3903 case AMDGPU::V_SUBBREV_U32_e32_vi: 3904 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3905 case AMDGPU::V_SUBBREV_U32_e64_vi: 3906 3907 case AMDGPU::V_SUBREV_U32_e32: 3908 case AMDGPU::V_SUBREV_U32_e64: 3909 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3910 case AMDGPU::V_SUBREV_U32_e32_vi: 3911 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3912 case AMDGPU::V_SUBREV_U32_e64_vi: 3913 3914 case AMDGPU::V_SUBREV_F16_e32: 3915 case AMDGPU::V_SUBREV_F16_e64: 3916 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3917 case AMDGPU::V_SUBREV_F16_e32_vi: 3918 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3919 case AMDGPU::V_SUBREV_F16_e64_vi: 3920 3921 case AMDGPU::V_SUBREV_U16_e32: 3922 case AMDGPU::V_SUBREV_U16_e64: 3923 case AMDGPU::V_SUBREV_U16_e32_vi: 3924 case AMDGPU::V_SUBREV_U16_e64_vi: 3925 3926 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3927 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3928 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3929 3930 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3931 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3932 3933 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3934 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3935 3936 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3937 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3938 3939 case AMDGPU::V_LSHRREV_B32_e32: 3940 case AMDGPU::V_LSHRREV_B32_e64: 3941 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3942 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3943 case AMDGPU::V_LSHRREV_B32_e32_vi: 3944 case AMDGPU::V_LSHRREV_B32_e64_vi: 3945 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3946 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3947 3948 case AMDGPU::V_ASHRREV_I32_e32: 3949 case AMDGPU::V_ASHRREV_I32_e64: 3950 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3951 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3952 case AMDGPU::V_ASHRREV_I32_e32_vi: 3953 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3954 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3955 case AMDGPU::V_ASHRREV_I32_e64_vi: 3956 3957 case AMDGPU::V_LSHLREV_B32_e32: 3958 case AMDGPU::V_LSHLREV_B32_e64: 3959 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3960 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3961 case AMDGPU::V_LSHLREV_B32_e32_vi: 3962 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3963 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3964 case AMDGPU::V_LSHLREV_B32_e64_vi: 3965 3966 case AMDGPU::V_LSHLREV_B16_e32: 3967 case AMDGPU::V_LSHLREV_B16_e64: 3968 case AMDGPU::V_LSHLREV_B16_e32_vi: 3969 case AMDGPU::V_LSHLREV_B16_e64_vi: 3970 case AMDGPU::V_LSHLREV_B16_gfx10: 3971 3972 case AMDGPU::V_LSHRREV_B16_e32: 3973 case AMDGPU::V_LSHRREV_B16_e64: 3974 case AMDGPU::V_LSHRREV_B16_e32_vi: 3975 case AMDGPU::V_LSHRREV_B16_e64_vi: 3976 case AMDGPU::V_LSHRREV_B16_gfx10: 3977 3978 case AMDGPU::V_ASHRREV_I16_e32: 3979 case AMDGPU::V_ASHRREV_I16_e64: 3980 case AMDGPU::V_ASHRREV_I16_e32_vi: 3981 case AMDGPU::V_ASHRREV_I16_e64_vi: 3982 case AMDGPU::V_ASHRREV_I16_gfx10: 3983 3984 case AMDGPU::V_LSHLREV_B64_e64: 3985 case AMDGPU::V_LSHLREV_B64_gfx10: 3986 case AMDGPU::V_LSHLREV_B64_vi: 3987 3988 case AMDGPU::V_LSHRREV_B64_e64: 3989 case AMDGPU::V_LSHRREV_B64_gfx10: 3990 case AMDGPU::V_LSHRREV_B64_vi: 3991 3992 case AMDGPU::V_ASHRREV_I64_e64: 3993 case AMDGPU::V_ASHRREV_I64_gfx10: 3994 case AMDGPU::V_ASHRREV_I64_vi: 3995 3996 case AMDGPU::V_PK_LSHLREV_B16: 3997 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3998 case AMDGPU::V_PK_LSHLREV_B16_vi: 3999 4000 case AMDGPU::V_PK_LSHRREV_B16: 4001 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4002 case AMDGPU::V_PK_LSHRREV_B16_vi: 4003 case AMDGPU::V_PK_ASHRREV_I16: 4004 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4005 case AMDGPU::V_PK_ASHRREV_I16_vi: 4006 return true; 4007 default: 4008 return false; 4009 } 4010 } 4011 4012 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4013 4014 using namespace SIInstrFlags; 4015 const unsigned Opcode = Inst.getOpcode(); 4016 const MCInstrDesc &Desc = MII.get(Opcode); 4017 4018 // lds_direct register is defined so that it can be used 4019 // with 9-bit operands only. Ignore encodings which do not accept these. 4020 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4021 if ((Desc.TSFlags & Enc) == 0) 4022 return None; 4023 4024 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4025 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4026 if (SrcIdx == -1) 4027 break; 4028 const auto &Src = Inst.getOperand(SrcIdx); 4029 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4030 4031 if (isGFX90A() || isGFX11Plus()) 4032 return StringRef("lds_direct is not supported on this GPU"); 4033 4034 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4035 return StringRef("lds_direct cannot be used with this instruction"); 4036 4037 if (SrcName != OpName::src0) 4038 return StringRef("lds_direct may be used as src0 only"); 4039 } 4040 } 4041 4042 return None; 4043 } 4044 4045 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4046 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4047 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4048 if (Op.isFlatOffset()) 4049 return Op.getStartLoc(); 4050 } 4051 return getLoc(); 4052 } 4053 4054 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4055 const OperandVector &Operands) { 4056 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4057 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4058 return true; 4059 4060 auto Opcode = Inst.getOpcode(); 4061 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4062 assert(OpNum != -1); 4063 4064 const auto &Op = Inst.getOperand(OpNum); 4065 if (!hasFlatOffsets() && Op.getImm() != 0) { 4066 Error(getFlatOffsetLoc(Operands), 4067 "flat offset modifier is not supported on this GPU"); 4068 return false; 4069 } 4070 4071 // For FLAT segment the offset must be positive; 4072 // MSB is ignored and forced to zero. 4073 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4074 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4075 if (!isIntN(OffsetSize, Op.getImm())) { 4076 Error(getFlatOffsetLoc(Operands), 4077 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4078 return false; 4079 } 4080 } else { 4081 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4082 if (!isUIntN(OffsetSize, Op.getImm())) { 4083 Error(getFlatOffsetLoc(Operands), 4084 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4085 return false; 4086 } 4087 } 4088 4089 return true; 4090 } 4091 4092 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4093 // Start with second operand because SMEM Offset cannot be dst or src0. 4094 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4096 if (Op.isSMEMOffset()) 4097 return Op.getStartLoc(); 4098 } 4099 return getLoc(); 4100 } 4101 4102 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4103 const OperandVector &Operands) { 4104 if (isCI() || isSI()) 4105 return true; 4106 4107 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4108 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4109 return true; 4110 4111 auto Opcode = Inst.getOpcode(); 4112 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4113 if (OpNum == -1) 4114 return true; 4115 4116 const auto &Op = Inst.getOperand(OpNum); 4117 if (!Op.isImm()) 4118 return true; 4119 4120 uint64_t Offset = Op.getImm(); 4121 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4122 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4123 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4124 return true; 4125 4126 Error(getSMEMOffsetLoc(Operands), 4127 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4128 "expected a 21-bit signed offset"); 4129 4130 return false; 4131 } 4132 4133 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4134 unsigned Opcode = Inst.getOpcode(); 4135 const MCInstrDesc &Desc = MII.get(Opcode); 4136 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4137 return true; 4138 4139 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4140 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4141 4142 const int OpIndices[] = { Src0Idx, Src1Idx }; 4143 4144 unsigned NumExprs = 0; 4145 unsigned NumLiterals = 0; 4146 uint32_t LiteralValue; 4147 4148 for (int OpIdx : OpIndices) { 4149 if (OpIdx == -1) break; 4150 4151 const MCOperand &MO = Inst.getOperand(OpIdx); 4152 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4153 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4154 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4155 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4156 if (NumLiterals == 0 || LiteralValue != Value) { 4157 LiteralValue = Value; 4158 ++NumLiterals; 4159 } 4160 } else if (MO.isExpr()) { 4161 ++NumExprs; 4162 } 4163 } 4164 } 4165 4166 return NumLiterals + NumExprs <= 1; 4167 } 4168 4169 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4170 const unsigned Opc = Inst.getOpcode(); 4171 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4172 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4173 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4174 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4175 4176 if (OpSel & ~3) 4177 return false; 4178 } 4179 4180 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4181 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4182 if (OpSelIdx != -1) { 4183 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4184 return false; 4185 } 4186 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4187 if (OpSelHiIdx != -1) { 4188 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4189 return false; 4190 } 4191 } 4192 4193 return true; 4194 } 4195 4196 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4197 const OperandVector &Operands) { 4198 const unsigned Opc = Inst.getOpcode(); 4199 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4200 if (DppCtrlIdx < 0) 4201 return true; 4202 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4203 4204 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4205 // DPP64 is supported for row_newbcast only. 4206 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4207 if (Src0Idx >= 0 && 4208 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4209 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4210 Error(S, "64 bit dpp only supports row_newbcast"); 4211 return false; 4212 } 4213 } 4214 4215 return true; 4216 } 4217 4218 // Check if VCC register matches wavefront size 4219 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4220 auto FB = getFeatureBits(); 4221 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4222 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4223 } 4224 4225 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4226 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4227 const OperandVector &Operands) { 4228 unsigned Opcode = Inst.getOpcode(); 4229 const MCInstrDesc &Desc = MII.get(Opcode); 4230 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4231 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4232 ImmIdx == -1) 4233 return true; 4234 4235 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4236 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4237 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4238 4239 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4240 4241 unsigned NumExprs = 0; 4242 unsigned NumLiterals = 0; 4243 uint32_t LiteralValue; 4244 4245 for (int OpIdx : OpIndices) { 4246 if (OpIdx == -1) 4247 continue; 4248 4249 const MCOperand &MO = Inst.getOperand(OpIdx); 4250 if (!MO.isImm() && !MO.isExpr()) 4251 continue; 4252 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4253 continue; 4254 4255 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4256 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4257 Error(getConstLoc(Operands), 4258 "inline constants are not allowed for this operand"); 4259 return false; 4260 } 4261 4262 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4263 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4264 if (NumLiterals == 0 || LiteralValue != Value) { 4265 LiteralValue = Value; 4266 ++NumLiterals; 4267 } 4268 } else if (MO.isExpr()) { 4269 ++NumExprs; 4270 } 4271 } 4272 NumLiterals += NumExprs; 4273 4274 if (!NumLiterals) 4275 return true; 4276 4277 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4278 Error(getLitLoc(Operands), "literal operands are not supported"); 4279 return false; 4280 } 4281 4282 if (NumLiterals > 1) { 4283 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4284 return false; 4285 } 4286 4287 return true; 4288 } 4289 4290 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4291 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4292 const MCRegisterInfo *MRI) { 4293 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4294 if (OpIdx < 0) 4295 return -1; 4296 4297 const MCOperand &Op = Inst.getOperand(OpIdx); 4298 if (!Op.isReg()) 4299 return -1; 4300 4301 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4302 auto Reg = Sub ? Sub : Op.getReg(); 4303 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4304 return AGPR32.contains(Reg) ? 1 : 0; 4305 } 4306 4307 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4308 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4309 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4310 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4311 SIInstrFlags::DS)) == 0) 4312 return true; 4313 4314 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4315 : AMDGPU::OpName::vdata; 4316 4317 const MCRegisterInfo *MRI = getMRI(); 4318 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4319 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4320 4321 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4322 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4323 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4324 return false; 4325 } 4326 4327 auto FB = getFeatureBits(); 4328 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4329 if (DataAreg < 0 || DstAreg < 0) 4330 return true; 4331 return DstAreg == DataAreg; 4332 } 4333 4334 return DstAreg < 1 && DataAreg < 1; 4335 } 4336 4337 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4338 auto FB = getFeatureBits(); 4339 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4340 return true; 4341 4342 const MCRegisterInfo *MRI = getMRI(); 4343 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4344 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4345 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4346 const MCOperand &Op = Inst.getOperand(I); 4347 if (!Op.isReg()) 4348 continue; 4349 4350 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4351 if (!Sub) 4352 continue; 4353 4354 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4355 return false; 4356 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4357 return false; 4358 } 4359 4360 return true; 4361 } 4362 4363 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4364 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4365 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4366 if (Op.isBLGP()) 4367 return Op.getStartLoc(); 4368 } 4369 return SMLoc(); 4370 } 4371 4372 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4373 const OperandVector &Operands) { 4374 unsigned Opc = Inst.getOpcode(); 4375 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4376 if (BlgpIdx == -1) 4377 return true; 4378 SMLoc BLGPLoc = getBLGPLoc(Operands); 4379 if (!BLGPLoc.isValid()) 4380 return true; 4381 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4382 auto FB = getFeatureBits(); 4383 bool UsesNeg = false; 4384 if (FB[AMDGPU::FeatureGFX940Insts]) { 4385 switch (Opc) { 4386 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4387 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4388 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4389 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4390 UsesNeg = true; 4391 } 4392 } 4393 4394 if (IsNeg == UsesNeg) 4395 return true; 4396 4397 Error(BLGPLoc, 4398 UsesNeg ? "invalid modifier: blgp is not supported" 4399 : "invalid modifier: neg is not supported"); 4400 4401 return false; 4402 } 4403 4404 // gfx90a has an undocumented limitation: 4405 // DS_GWS opcodes must use even aligned registers. 4406 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4407 const OperandVector &Operands) { 4408 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4409 return true; 4410 4411 int Opc = Inst.getOpcode(); 4412 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4413 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4414 return true; 4415 4416 const MCRegisterInfo *MRI = getMRI(); 4417 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4418 int Data0Pos = 4419 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4420 assert(Data0Pos != -1); 4421 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4422 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4423 if (RegIdx & 1) { 4424 SMLoc RegLoc = getRegLoc(Reg, Operands); 4425 Error(RegLoc, "vgpr must be even aligned"); 4426 return false; 4427 } 4428 4429 return true; 4430 } 4431 4432 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4433 const OperandVector &Operands, 4434 const SMLoc &IDLoc) { 4435 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4436 AMDGPU::OpName::cpol); 4437 if (CPolPos == -1) 4438 return true; 4439 4440 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4441 4442 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4443 if (TSFlags & SIInstrFlags::SMRD) { 4444 if (CPol && (isSI() || isCI())) { 4445 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4446 Error(S, "cache policy is not supported for SMRD instructions"); 4447 return false; 4448 } 4449 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4450 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4451 return false; 4452 } 4453 } 4454 4455 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4456 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4457 StringRef CStr(S.getPointer()); 4458 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4459 Error(S, "scc is not supported on this GPU"); 4460 return false; 4461 } 4462 4463 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4464 return true; 4465 4466 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4467 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4468 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4469 : "instruction must use glc"); 4470 return false; 4471 } 4472 } else { 4473 if (CPol & CPol::GLC) { 4474 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4475 StringRef CStr(S.getPointer()); 4476 S = SMLoc::getFromPointer( 4477 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4478 Error(S, isGFX940() ? "instruction must not use sc0" 4479 : "instruction must not use glc"); 4480 return false; 4481 } 4482 } 4483 4484 return true; 4485 } 4486 4487 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4488 const OperandVector &Operands, 4489 const SMLoc &IDLoc) { 4490 if (isGFX940()) 4491 return true; 4492 4493 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4494 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4495 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4496 return true; 4497 // This is FLAT LDS DMA. 4498 4499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4500 StringRef CStr(S.getPointer()); 4501 if (!CStr.startswith("lds")) { 4502 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4503 // And LDS version should have 'lds' modifier, but it follows optional 4504 // operands so its absense is ignored by the matcher. 4505 Error(IDLoc, "invalid operands for instruction"); 4506 return false; 4507 } 4508 4509 return true; 4510 } 4511 4512 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4513 if (!isGFX11Plus()) 4514 return true; 4515 for (auto &Operand : Operands) { 4516 if (!Operand->isReg()) 4517 continue; 4518 unsigned Reg = Operand->getReg(); 4519 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4520 Error(getRegLoc(Reg, Operands), 4521 "execz and vccz are not supported on this GPU"); 4522 return false; 4523 } 4524 } 4525 return true; 4526 } 4527 4528 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4529 const SMLoc &IDLoc, 4530 const OperandVector &Operands) { 4531 if (auto ErrMsg = validateLdsDirect(Inst)) { 4532 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4533 return false; 4534 } 4535 if (!validateSOPLiteral(Inst)) { 4536 Error(getLitLoc(Operands), 4537 "only one literal operand is allowed"); 4538 return false; 4539 } 4540 if (!validateVOPLiteral(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateConstantBusLimitations(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4547 return false; 4548 } 4549 if (!validateIntClampSupported(Inst)) { 4550 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4551 "integer clamping is not supported on this GPU"); 4552 return false; 4553 } 4554 if (!validateOpSel(Inst)) { 4555 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4556 "invalid op_sel operand"); 4557 return false; 4558 } 4559 if (!validateDPP(Inst, Operands)) { 4560 return false; 4561 } 4562 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4563 if (!validateMIMGD16(Inst)) { 4564 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4565 "d16 modifier is not supported on this GPU"); 4566 return false; 4567 } 4568 if (!validateMIMGDim(Inst)) { 4569 Error(IDLoc, "dim modifier is required on this GPU"); 4570 return false; 4571 } 4572 if (!validateMIMGMSAA(Inst)) { 4573 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4574 "invalid dim; must be MSAA type"); 4575 return false; 4576 } 4577 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4578 Error(IDLoc, *ErrMsg); 4579 return false; 4580 } 4581 if (!validateMIMGAddrSize(Inst)) { 4582 Error(IDLoc, 4583 "image address size does not match dim and a16"); 4584 return false; 4585 } 4586 if (!validateMIMGAtomicDMask(Inst)) { 4587 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4588 "invalid atomic image dmask"); 4589 return false; 4590 } 4591 if (!validateMIMGGatherDMask(Inst)) { 4592 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4593 "invalid image_gather dmask: only one bit must be set"); 4594 return false; 4595 } 4596 if (!validateMovrels(Inst, Operands)) { 4597 return false; 4598 } 4599 if (!validateFlatOffset(Inst, Operands)) { 4600 return false; 4601 } 4602 if (!validateSMEMOffset(Inst, Operands)) { 4603 return false; 4604 } 4605 if (!validateMAIAccWrite(Inst, Operands)) { 4606 return false; 4607 } 4608 if (!validateMFMA(Inst, Operands)) { 4609 return false; 4610 } 4611 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4612 return false; 4613 } 4614 4615 if (!validateAGPRLdSt(Inst)) { 4616 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4617 ? "invalid register class: data and dst should be all VGPR or AGPR" 4618 : "invalid register class: agpr loads and stores not supported on this GPU" 4619 ); 4620 return false; 4621 } 4622 if (!validateVGPRAlign(Inst)) { 4623 Error(IDLoc, 4624 "invalid register class: vgpr tuples must be 64 bit aligned"); 4625 return false; 4626 } 4627 if (!validateGWS(Inst, Operands)) { 4628 return false; 4629 } 4630 4631 if (!validateBLGP(Inst, Operands)) { 4632 return false; 4633 } 4634 4635 if (!validateDivScale(Inst)) { 4636 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4637 return false; 4638 } 4639 if (!validateExeczVcczOperands(Operands)) { 4640 return false; 4641 } 4642 4643 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4644 return false; 4645 } 4646 4647 return true; 4648 } 4649 4650 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4651 const FeatureBitset &FBS, 4652 unsigned VariantID = 0); 4653 4654 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4655 const FeatureBitset &AvailableFeatures, 4656 unsigned VariantID); 4657 4658 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4659 const FeatureBitset &FBS) { 4660 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4661 } 4662 4663 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4664 const FeatureBitset &FBS, 4665 ArrayRef<unsigned> Variants) { 4666 for (auto Variant : Variants) { 4667 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4668 return true; 4669 } 4670 4671 return false; 4672 } 4673 4674 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4675 const SMLoc &IDLoc) { 4676 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4677 4678 // Check if requested instruction variant is supported. 4679 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4680 return false; 4681 4682 // This instruction is not supported. 4683 // Clear any other pending errors because they are no longer relevant. 4684 getParser().clearPendingErrors(); 4685 4686 // Requested instruction variant is not supported. 4687 // Check if any other variants are supported. 4688 StringRef VariantName = getMatchedVariantName(); 4689 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4690 return Error(IDLoc, 4691 Twine(VariantName, 4692 " variant of this instruction is not supported")); 4693 } 4694 4695 // Finally check if this instruction is supported on any other GPU. 4696 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4697 return Error(IDLoc, "instruction not supported on this GPU"); 4698 } 4699 4700 // Instruction not supported on any GPU. Probably a typo. 4701 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4702 return Error(IDLoc, "invalid instruction" + Suggestion); 4703 } 4704 4705 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4706 OperandVector &Operands, 4707 MCStreamer &Out, 4708 uint64_t &ErrorInfo, 4709 bool MatchingInlineAsm) { 4710 MCInst Inst; 4711 unsigned Result = Match_Success; 4712 for (auto Variant : getMatchedVariants()) { 4713 uint64_t EI; 4714 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4715 Variant); 4716 // We order match statuses from least to most specific. We use most specific 4717 // status as resulting 4718 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4719 if ((R == Match_Success) || 4720 (R == Match_PreferE32) || 4721 (R == Match_MissingFeature && Result != Match_PreferE32) || 4722 (R == Match_InvalidOperand && Result != Match_MissingFeature 4723 && Result != Match_PreferE32) || 4724 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4725 && Result != Match_MissingFeature 4726 && Result != Match_PreferE32)) { 4727 Result = R; 4728 ErrorInfo = EI; 4729 } 4730 if (R == Match_Success) 4731 break; 4732 } 4733 4734 if (Result == Match_Success) { 4735 if (!validateInstruction(Inst, IDLoc, Operands)) { 4736 return true; 4737 } 4738 Inst.setLoc(IDLoc); 4739 Out.emitInstruction(Inst, getSTI()); 4740 return false; 4741 } 4742 4743 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4744 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4745 return true; 4746 } 4747 4748 switch (Result) { 4749 default: break; 4750 case Match_MissingFeature: 4751 // It has been verified that the specified instruction 4752 // mnemonic is valid. A match was found but it requires 4753 // features which are not supported on this GPU. 4754 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4755 4756 case Match_InvalidOperand: { 4757 SMLoc ErrorLoc = IDLoc; 4758 if (ErrorInfo != ~0ULL) { 4759 if (ErrorInfo >= Operands.size()) { 4760 return Error(IDLoc, "too few operands for instruction"); 4761 } 4762 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4763 if (ErrorLoc == SMLoc()) 4764 ErrorLoc = IDLoc; 4765 } 4766 return Error(ErrorLoc, "invalid operand for instruction"); 4767 } 4768 4769 case Match_PreferE32: 4770 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4771 "should be encoded as e32"); 4772 case Match_MnemonicFail: 4773 llvm_unreachable("Invalid instructions should have been handled already"); 4774 } 4775 llvm_unreachable("Implement any new match types added!"); 4776 } 4777 4778 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4779 int64_t Tmp = -1; 4780 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4781 return true; 4782 } 4783 if (getParser().parseAbsoluteExpression(Tmp)) { 4784 return true; 4785 } 4786 Ret = static_cast<uint32_t>(Tmp); 4787 return false; 4788 } 4789 4790 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4791 uint32_t &Minor) { 4792 if (ParseAsAbsoluteExpression(Major)) 4793 return TokError("invalid major version"); 4794 4795 if (!trySkipToken(AsmToken::Comma)) 4796 return TokError("minor version number required, comma expected"); 4797 4798 if (ParseAsAbsoluteExpression(Minor)) 4799 return TokError("invalid minor version"); 4800 4801 return false; 4802 } 4803 4804 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4805 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4806 return TokError("directive only supported for amdgcn architecture"); 4807 4808 std::string TargetIDDirective; 4809 SMLoc TargetStart = getTok().getLoc(); 4810 if (getParser().parseEscapedString(TargetIDDirective)) 4811 return true; 4812 4813 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4814 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4815 return getParser().Error(TargetRange.Start, 4816 (Twine(".amdgcn_target directive's target id ") + 4817 Twine(TargetIDDirective) + 4818 Twine(" does not match the specified target id ") + 4819 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4820 4821 return false; 4822 } 4823 4824 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4825 return Error(Range.Start, "value out of range", Range); 4826 } 4827 4828 bool AMDGPUAsmParser::calculateGPRBlocks( 4829 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4830 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4831 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4832 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4833 // TODO(scott.linder): These calculations are duplicated from 4834 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4835 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4836 4837 unsigned NumVGPRs = NextFreeVGPR; 4838 unsigned NumSGPRs = NextFreeSGPR; 4839 4840 if (Version.Major >= 10) 4841 NumSGPRs = 0; 4842 else { 4843 unsigned MaxAddressableNumSGPRs = 4844 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4845 4846 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4847 NumSGPRs > MaxAddressableNumSGPRs) 4848 return OutOfRangeError(SGPRRange); 4849 4850 NumSGPRs += 4851 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4852 4853 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4854 NumSGPRs > MaxAddressableNumSGPRs) 4855 return OutOfRangeError(SGPRRange); 4856 4857 if (Features.test(FeatureSGPRInitBug)) 4858 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4859 } 4860 4861 VGPRBlocks = 4862 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4863 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4864 4865 return false; 4866 } 4867 4868 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4869 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4870 return TokError("directive only supported for amdgcn architecture"); 4871 4872 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4873 return TokError("directive only supported for amdhsa OS"); 4874 4875 StringRef KernelName; 4876 if (getParser().parseIdentifier(KernelName)) 4877 return true; 4878 4879 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4880 4881 StringSet<> Seen; 4882 4883 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4884 4885 SMRange VGPRRange; 4886 uint64_t NextFreeVGPR = 0; 4887 uint64_t AccumOffset = 0; 4888 uint64_t SharedVGPRCount = 0; 4889 SMRange SGPRRange; 4890 uint64_t NextFreeSGPR = 0; 4891 4892 // Count the number of user SGPRs implied from the enabled feature bits. 4893 unsigned ImpliedUserSGPRCount = 0; 4894 4895 // Track if the asm explicitly contains the directive for the user SGPR 4896 // count. 4897 Optional<unsigned> ExplicitUserSGPRCount; 4898 bool ReserveVCC = true; 4899 bool ReserveFlatScr = true; 4900 Optional<bool> EnableWavefrontSize32; 4901 4902 while (true) { 4903 while (trySkipToken(AsmToken::EndOfStatement)); 4904 4905 StringRef ID; 4906 SMRange IDRange = getTok().getLocRange(); 4907 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4908 return true; 4909 4910 if (ID == ".end_amdhsa_kernel") 4911 break; 4912 4913 if (!Seen.insert(ID).second) 4914 return TokError(".amdhsa_ directives cannot be repeated"); 4915 4916 SMLoc ValStart = getLoc(); 4917 int64_t IVal; 4918 if (getParser().parseAbsoluteExpression(IVal)) 4919 return true; 4920 SMLoc ValEnd = getLoc(); 4921 SMRange ValRange = SMRange(ValStart, ValEnd); 4922 4923 if (IVal < 0) 4924 return OutOfRangeError(ValRange); 4925 4926 uint64_t Val = IVal; 4927 4928 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4929 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4930 return OutOfRangeError(RANGE); \ 4931 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4932 4933 if (ID == ".amdhsa_group_segment_fixed_size") { 4934 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4935 return OutOfRangeError(ValRange); 4936 KD.group_segment_fixed_size = Val; 4937 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4938 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4939 return OutOfRangeError(ValRange); 4940 KD.private_segment_fixed_size = Val; 4941 } else if (ID == ".amdhsa_kernarg_size") { 4942 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4943 return OutOfRangeError(ValRange); 4944 KD.kernarg_size = Val; 4945 } else if (ID == ".amdhsa_user_sgpr_count") { 4946 ExplicitUserSGPRCount = Val; 4947 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4948 if (hasArchitectedFlatScratch()) 4949 return Error(IDRange.Start, 4950 "directive is not supported with architected flat scratch", 4951 IDRange); 4952 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4953 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4954 Val, ValRange); 4955 if (Val) 4956 ImpliedUserSGPRCount += 4; 4957 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4958 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4959 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4960 ValRange); 4961 if (Val) 4962 ImpliedUserSGPRCount += 2; 4963 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4964 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4965 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4966 ValRange); 4967 if (Val) 4968 ImpliedUserSGPRCount += 2; 4969 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4970 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4971 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4972 Val, ValRange); 4973 if (Val) 4974 ImpliedUserSGPRCount += 2; 4975 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4976 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4977 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4978 ValRange); 4979 if (Val) 4980 ImpliedUserSGPRCount += 2; 4981 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4982 if (hasArchitectedFlatScratch()) 4983 return Error(IDRange.Start, 4984 "directive is not supported with architected flat scratch", 4985 IDRange); 4986 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4987 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4988 ValRange); 4989 if (Val) 4990 ImpliedUserSGPRCount += 2; 4991 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4992 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4993 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4994 Val, ValRange); 4995 if (Val) 4996 ImpliedUserSGPRCount += 1; 4997 } else if (ID == ".amdhsa_wavefront_size32") { 4998 if (IVersion.Major < 10) 4999 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5000 EnableWavefrontSize32 = Val; 5001 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5002 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5003 Val, ValRange); 5004 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5005 if (hasArchitectedFlatScratch()) 5006 return Error(IDRange.Start, 5007 "directive is not supported with architected flat scratch", 5008 IDRange); 5009 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5010 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5011 } else if (ID == ".amdhsa_enable_private_segment") { 5012 if (!hasArchitectedFlatScratch()) 5013 return Error( 5014 IDRange.Start, 5015 "directive is not supported without architected flat scratch", 5016 IDRange); 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5018 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5019 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5020 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5021 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5022 ValRange); 5023 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5025 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5026 ValRange); 5027 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5028 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5029 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5030 ValRange); 5031 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5032 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5033 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5034 ValRange); 5035 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5036 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5037 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5038 ValRange); 5039 } else if (ID == ".amdhsa_next_free_vgpr") { 5040 VGPRRange = ValRange; 5041 NextFreeVGPR = Val; 5042 } else if (ID == ".amdhsa_next_free_sgpr") { 5043 SGPRRange = ValRange; 5044 NextFreeSGPR = Val; 5045 } else if (ID == ".amdhsa_accum_offset") { 5046 if (!isGFX90A()) 5047 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5048 AccumOffset = Val; 5049 } else if (ID == ".amdhsa_reserve_vcc") { 5050 if (!isUInt<1>(Val)) 5051 return OutOfRangeError(ValRange); 5052 ReserveVCC = Val; 5053 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5054 if (IVersion.Major < 7) 5055 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5056 if (hasArchitectedFlatScratch()) 5057 return Error(IDRange.Start, 5058 "directive is not supported with architected flat scratch", 5059 IDRange); 5060 if (!isUInt<1>(Val)) 5061 return OutOfRangeError(ValRange); 5062 ReserveFlatScr = Val; 5063 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5064 if (IVersion.Major < 8) 5065 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5066 if (!isUInt<1>(Val)) 5067 return OutOfRangeError(ValRange); 5068 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5069 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5070 IDRange); 5071 } else if (ID == ".amdhsa_float_round_mode_32") { 5072 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5073 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5074 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5076 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5077 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5079 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5080 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5082 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5083 ValRange); 5084 } else if (ID == ".amdhsa_dx10_clamp") { 5085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5086 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5087 } else if (ID == ".amdhsa_ieee_mode") { 5088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5089 Val, ValRange); 5090 } else if (ID == ".amdhsa_fp16_overflow") { 5091 if (IVersion.Major < 9) 5092 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5094 ValRange); 5095 } else if (ID == ".amdhsa_tg_split") { 5096 if (!isGFX90A()) 5097 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5098 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5099 ValRange); 5100 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5101 if (IVersion.Major < 10) 5102 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5104 ValRange); 5105 } else if (ID == ".amdhsa_memory_ordered") { 5106 if (IVersion.Major < 10) 5107 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5109 ValRange); 5110 } else if (ID == ".amdhsa_forward_progress") { 5111 if (IVersion.Major < 10) 5112 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5113 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5114 ValRange); 5115 } else if (ID == ".amdhsa_shared_vgpr_count") { 5116 if (IVersion.Major < 10) 5117 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5118 SharedVGPRCount = Val; 5119 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5120 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5121 ValRange); 5122 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5123 PARSE_BITS_ENTRY( 5124 KD.compute_pgm_rsrc2, 5125 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5126 ValRange); 5127 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5128 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5129 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5130 Val, ValRange); 5131 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5132 PARSE_BITS_ENTRY( 5133 KD.compute_pgm_rsrc2, 5134 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5135 ValRange); 5136 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5137 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5138 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5139 Val, ValRange); 5140 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5141 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5142 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5143 Val, ValRange); 5144 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5145 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5146 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5147 Val, ValRange); 5148 } else if (ID == ".amdhsa_exception_int_div_zero") { 5149 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5150 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5151 Val, ValRange); 5152 } else { 5153 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5154 } 5155 5156 #undef PARSE_BITS_ENTRY 5157 } 5158 5159 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5160 return TokError(".amdhsa_next_free_vgpr directive is required"); 5161 5162 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5163 return TokError(".amdhsa_next_free_sgpr directive is required"); 5164 5165 unsigned VGPRBlocks; 5166 unsigned SGPRBlocks; 5167 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5168 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5169 EnableWavefrontSize32, NextFreeVGPR, 5170 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5171 SGPRBlocks)) 5172 return true; 5173 5174 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5175 VGPRBlocks)) 5176 return OutOfRangeError(VGPRRange); 5177 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5178 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5179 5180 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5181 SGPRBlocks)) 5182 return OutOfRangeError(SGPRRange); 5183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5184 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5185 SGPRBlocks); 5186 5187 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5188 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5189 "enabled user SGPRs"); 5190 5191 unsigned UserSGPRCount = 5192 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5193 5194 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5195 return TokError("too many user SGPRs enabled"); 5196 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5197 UserSGPRCount); 5198 5199 if (isGFX90A()) { 5200 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5201 return TokError(".amdhsa_accum_offset directive is required"); 5202 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5203 return TokError("accum_offset should be in range [4..256] in " 5204 "increments of 4"); 5205 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5206 return TokError("accum_offset exceeds total VGPR allocation"); 5207 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5208 (AccumOffset / 4 - 1)); 5209 } 5210 5211 if (IVersion.Major == 10) { 5212 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5213 if (SharedVGPRCount && EnableWavefrontSize32) { 5214 return TokError("shared_vgpr_count directive not valid on " 5215 "wavefront size 32"); 5216 } 5217 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5218 return TokError("shared_vgpr_count*2 + " 5219 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5220 "exceed 63\n"); 5221 } 5222 } 5223 5224 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5225 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5226 ReserveFlatScr); 5227 return false; 5228 } 5229 5230 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5231 uint32_t Major; 5232 uint32_t Minor; 5233 5234 if (ParseDirectiveMajorMinor(Major, Minor)) 5235 return true; 5236 5237 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5238 return false; 5239 } 5240 5241 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5242 uint32_t Major; 5243 uint32_t Minor; 5244 uint32_t Stepping; 5245 StringRef VendorName; 5246 StringRef ArchName; 5247 5248 // If this directive has no arguments, then use the ISA version for the 5249 // targeted GPU. 5250 if (isToken(AsmToken::EndOfStatement)) { 5251 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5252 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5253 ISA.Stepping, 5254 "AMD", "AMDGPU"); 5255 return false; 5256 } 5257 5258 if (ParseDirectiveMajorMinor(Major, Minor)) 5259 return true; 5260 5261 if (!trySkipToken(AsmToken::Comma)) 5262 return TokError("stepping version number required, comma expected"); 5263 5264 if (ParseAsAbsoluteExpression(Stepping)) 5265 return TokError("invalid stepping version"); 5266 5267 if (!trySkipToken(AsmToken::Comma)) 5268 return TokError("vendor name required, comma expected"); 5269 5270 if (!parseString(VendorName, "invalid vendor name")) 5271 return true; 5272 5273 if (!trySkipToken(AsmToken::Comma)) 5274 return TokError("arch name required, comma expected"); 5275 5276 if (!parseString(ArchName, "invalid arch name")) 5277 return true; 5278 5279 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5280 VendorName, ArchName); 5281 return false; 5282 } 5283 5284 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5285 amd_kernel_code_t &Header) { 5286 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5287 // assembly for backwards compatibility. 5288 if (ID == "max_scratch_backing_memory_byte_size") { 5289 Parser.eatToEndOfStatement(); 5290 return false; 5291 } 5292 5293 SmallString<40> ErrStr; 5294 raw_svector_ostream Err(ErrStr); 5295 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5296 return TokError(Err.str()); 5297 } 5298 Lex(); 5299 5300 if (ID == "enable_wavefront_size32") { 5301 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5302 if (!isGFX10Plus()) 5303 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5304 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5305 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5306 } else { 5307 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5308 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5309 } 5310 } 5311 5312 if (ID == "wavefront_size") { 5313 if (Header.wavefront_size == 5) { 5314 if (!isGFX10Plus()) 5315 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5316 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5317 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5318 } else if (Header.wavefront_size == 6) { 5319 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5320 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5321 } 5322 } 5323 5324 if (ID == "enable_wgp_mode") { 5325 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5326 !isGFX10Plus()) 5327 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5328 } 5329 5330 if (ID == "enable_mem_ordered") { 5331 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5332 !isGFX10Plus()) 5333 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5334 } 5335 5336 if (ID == "enable_fwd_progress") { 5337 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5338 !isGFX10Plus()) 5339 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5340 } 5341 5342 return false; 5343 } 5344 5345 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5346 amd_kernel_code_t Header; 5347 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5348 5349 while (true) { 5350 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5351 // will set the current token to EndOfStatement. 5352 while(trySkipToken(AsmToken::EndOfStatement)); 5353 5354 StringRef ID; 5355 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5356 return true; 5357 5358 if (ID == ".end_amd_kernel_code_t") 5359 break; 5360 5361 if (ParseAMDKernelCodeTValue(ID, Header)) 5362 return true; 5363 } 5364 5365 getTargetStreamer().EmitAMDKernelCodeT(Header); 5366 5367 return false; 5368 } 5369 5370 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5371 StringRef KernelName; 5372 if (!parseId(KernelName, "expected symbol name")) 5373 return true; 5374 5375 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5376 ELF::STT_AMDGPU_HSA_KERNEL); 5377 5378 KernelScope.initialize(getContext()); 5379 return false; 5380 } 5381 5382 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5383 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5384 return Error(getLoc(), 5385 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5386 "architectures"); 5387 } 5388 5389 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5390 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5391 return Error(getParser().getTok().getLoc(), "target id must match options"); 5392 5393 getTargetStreamer().EmitISAVersion(); 5394 Lex(); 5395 5396 return false; 5397 } 5398 5399 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5400 const char *AssemblerDirectiveBegin; 5401 const char *AssemblerDirectiveEnd; 5402 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5403 isHsaAbiVersion3AndAbove(&getSTI()) 5404 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5405 HSAMD::V3::AssemblerDirectiveEnd) 5406 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5407 HSAMD::AssemblerDirectiveEnd); 5408 5409 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5410 return Error(getLoc(), 5411 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5412 "not available on non-amdhsa OSes")).str()); 5413 } 5414 5415 std::string HSAMetadataString; 5416 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5417 HSAMetadataString)) 5418 return true; 5419 5420 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5421 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5422 return Error(getLoc(), "invalid HSA metadata"); 5423 } else { 5424 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5425 return Error(getLoc(), "invalid HSA metadata"); 5426 } 5427 5428 return false; 5429 } 5430 5431 /// Common code to parse out a block of text (typically YAML) between start and 5432 /// end directives. 5433 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5434 const char *AssemblerDirectiveEnd, 5435 std::string &CollectString) { 5436 5437 raw_string_ostream CollectStream(CollectString); 5438 5439 getLexer().setSkipSpace(false); 5440 5441 bool FoundEnd = false; 5442 while (!isToken(AsmToken::Eof)) { 5443 while (isToken(AsmToken::Space)) { 5444 CollectStream << getTokenStr(); 5445 Lex(); 5446 } 5447 5448 if (trySkipId(AssemblerDirectiveEnd)) { 5449 FoundEnd = true; 5450 break; 5451 } 5452 5453 CollectStream << Parser.parseStringToEndOfStatement() 5454 << getContext().getAsmInfo()->getSeparatorString(); 5455 5456 Parser.eatToEndOfStatement(); 5457 } 5458 5459 getLexer().setSkipSpace(true); 5460 5461 if (isToken(AsmToken::Eof) && !FoundEnd) { 5462 return TokError(Twine("expected directive ") + 5463 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5464 } 5465 5466 CollectStream.flush(); 5467 return false; 5468 } 5469 5470 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5471 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5472 std::string String; 5473 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5474 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5475 return true; 5476 5477 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5478 if (!PALMetadata->setFromString(String)) 5479 return Error(getLoc(), "invalid PAL metadata"); 5480 return false; 5481 } 5482 5483 /// Parse the assembler directive for old linear-format PAL metadata. 5484 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5485 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5486 return Error(getLoc(), 5487 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5488 "not available on non-amdpal OSes")).str()); 5489 } 5490 5491 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5492 PALMetadata->setLegacy(); 5493 for (;;) { 5494 uint32_t Key, Value; 5495 if (ParseAsAbsoluteExpression(Key)) { 5496 return TokError(Twine("invalid value in ") + 5497 Twine(PALMD::AssemblerDirective)); 5498 } 5499 if (!trySkipToken(AsmToken::Comma)) { 5500 return TokError(Twine("expected an even number of values in ") + 5501 Twine(PALMD::AssemblerDirective)); 5502 } 5503 if (ParseAsAbsoluteExpression(Value)) { 5504 return TokError(Twine("invalid value in ") + 5505 Twine(PALMD::AssemblerDirective)); 5506 } 5507 PALMetadata->setRegister(Key, Value); 5508 if (!trySkipToken(AsmToken::Comma)) 5509 break; 5510 } 5511 return false; 5512 } 5513 5514 /// ParseDirectiveAMDGPULDS 5515 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5516 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5517 if (getParser().checkForValidSection()) 5518 return true; 5519 5520 StringRef Name; 5521 SMLoc NameLoc = getLoc(); 5522 if (getParser().parseIdentifier(Name)) 5523 return TokError("expected identifier in directive"); 5524 5525 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5526 if (parseToken(AsmToken::Comma, "expected ','")) 5527 return true; 5528 5529 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5530 5531 int64_t Size; 5532 SMLoc SizeLoc = getLoc(); 5533 if (getParser().parseAbsoluteExpression(Size)) 5534 return true; 5535 if (Size < 0) 5536 return Error(SizeLoc, "size must be non-negative"); 5537 if (Size > LocalMemorySize) 5538 return Error(SizeLoc, "size is too large"); 5539 5540 int64_t Alignment = 4; 5541 if (trySkipToken(AsmToken::Comma)) { 5542 SMLoc AlignLoc = getLoc(); 5543 if (getParser().parseAbsoluteExpression(Alignment)) 5544 return true; 5545 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5546 return Error(AlignLoc, "alignment must be a power of two"); 5547 5548 // Alignment larger than the size of LDS is possible in theory, as long 5549 // as the linker manages to place to symbol at address 0, but we do want 5550 // to make sure the alignment fits nicely into a 32-bit integer. 5551 if (Alignment >= 1u << 31) 5552 return Error(AlignLoc, "alignment is too large"); 5553 } 5554 5555 if (parseEOL()) 5556 return true; 5557 5558 Symbol->redefineIfPossible(); 5559 if (!Symbol->isUndefined()) 5560 return Error(NameLoc, "invalid symbol redefinition"); 5561 5562 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5563 return false; 5564 } 5565 5566 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5567 StringRef IDVal = DirectiveID.getString(); 5568 5569 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5570 if (IDVal == ".amdhsa_kernel") 5571 return ParseDirectiveAMDHSAKernel(); 5572 5573 // TODO: Restructure/combine with PAL metadata directive. 5574 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5575 return ParseDirectiveHSAMetadata(); 5576 } else { 5577 if (IDVal == ".hsa_code_object_version") 5578 return ParseDirectiveHSACodeObjectVersion(); 5579 5580 if (IDVal == ".hsa_code_object_isa") 5581 return ParseDirectiveHSACodeObjectISA(); 5582 5583 if (IDVal == ".amd_kernel_code_t") 5584 return ParseDirectiveAMDKernelCodeT(); 5585 5586 if (IDVal == ".amdgpu_hsa_kernel") 5587 return ParseDirectiveAMDGPUHsaKernel(); 5588 5589 if (IDVal == ".amd_amdgpu_isa") 5590 return ParseDirectiveISAVersion(); 5591 5592 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5593 return ParseDirectiveHSAMetadata(); 5594 } 5595 5596 if (IDVal == ".amdgcn_target") 5597 return ParseDirectiveAMDGCNTarget(); 5598 5599 if (IDVal == ".amdgpu_lds") 5600 return ParseDirectiveAMDGPULDS(); 5601 5602 if (IDVal == PALMD::AssemblerDirectiveBegin) 5603 return ParseDirectivePALMetadataBegin(); 5604 5605 if (IDVal == PALMD::AssemblerDirective) 5606 return ParseDirectivePALMetadata(); 5607 5608 return true; 5609 } 5610 5611 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5612 unsigned RegNo) { 5613 5614 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5615 return isGFX9Plus(); 5616 5617 // GFX10+ has 2 more SGPRs 104 and 105. 5618 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5619 return hasSGPR104_SGPR105(); 5620 5621 switch (RegNo) { 5622 case AMDGPU::SRC_SHARED_BASE: 5623 case AMDGPU::SRC_SHARED_LIMIT: 5624 case AMDGPU::SRC_PRIVATE_BASE: 5625 case AMDGPU::SRC_PRIVATE_LIMIT: 5626 return isGFX9Plus(); 5627 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5628 return isGFX9Plus() && !isGFX11Plus(); 5629 case AMDGPU::TBA: 5630 case AMDGPU::TBA_LO: 5631 case AMDGPU::TBA_HI: 5632 case AMDGPU::TMA: 5633 case AMDGPU::TMA_LO: 5634 case AMDGPU::TMA_HI: 5635 return !isGFX9Plus(); 5636 case AMDGPU::XNACK_MASK: 5637 case AMDGPU::XNACK_MASK_LO: 5638 case AMDGPU::XNACK_MASK_HI: 5639 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5640 case AMDGPU::SGPR_NULL: 5641 return isGFX10Plus(); 5642 default: 5643 break; 5644 } 5645 5646 if (isCI()) 5647 return true; 5648 5649 if (isSI() || isGFX10Plus()) { 5650 // No flat_scr on SI. 5651 // On GFX10Plus flat scratch is not a valid register operand and can only be 5652 // accessed with s_setreg/s_getreg. 5653 switch (RegNo) { 5654 case AMDGPU::FLAT_SCR: 5655 case AMDGPU::FLAT_SCR_LO: 5656 case AMDGPU::FLAT_SCR_HI: 5657 return false; 5658 default: 5659 return true; 5660 } 5661 } 5662 5663 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5664 // SI/CI have. 5665 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5666 return hasSGPR102_SGPR103(); 5667 5668 return true; 5669 } 5670 5671 OperandMatchResultTy 5672 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5673 OperandMode Mode) { 5674 OperandMatchResultTy ResTy = parseVOPD(Operands); 5675 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5676 isToken(AsmToken::EndOfStatement)) 5677 return ResTy; 5678 5679 // Try to parse with a custom parser 5680 ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5681 5682 // If we successfully parsed the operand or if there as an error parsing, 5683 // we are done. 5684 // 5685 // If we are parsing after we reach EndOfStatement then this means we 5686 // are appending default values to the Operands list. This is only done 5687 // by custom parser, so we shouldn't continue on to the generic parsing. 5688 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5689 isToken(AsmToken::EndOfStatement)) 5690 return ResTy; 5691 5692 SMLoc RBraceLoc; 5693 SMLoc LBraceLoc = getLoc(); 5694 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5695 unsigned Prefix = Operands.size(); 5696 5697 for (;;) { 5698 auto Loc = getLoc(); 5699 ResTy = parseReg(Operands); 5700 if (ResTy == MatchOperand_NoMatch) 5701 Error(Loc, "expected a register"); 5702 if (ResTy != MatchOperand_Success) 5703 return MatchOperand_ParseFail; 5704 5705 RBraceLoc = getLoc(); 5706 if (trySkipToken(AsmToken::RBrac)) 5707 break; 5708 5709 if (!skipToken(AsmToken::Comma, 5710 "expected a comma or a closing square bracket")) { 5711 return MatchOperand_ParseFail; 5712 } 5713 } 5714 5715 if (Operands.size() - Prefix > 1) { 5716 Operands.insert(Operands.begin() + Prefix, 5717 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5718 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5719 } 5720 5721 return MatchOperand_Success; 5722 } 5723 5724 return parseRegOrImm(Operands); 5725 } 5726 5727 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5728 // Clear any forced encodings from the previous instruction. 5729 setForcedEncodingSize(0); 5730 setForcedDPP(false); 5731 setForcedSDWA(false); 5732 5733 if (Name.endswith("_e64_dpp")) { 5734 setForcedDPP(true); 5735 setForcedEncodingSize(64); 5736 return Name.substr(0, Name.size() - 8); 5737 } else if (Name.endswith("_e64")) { 5738 setForcedEncodingSize(64); 5739 return Name.substr(0, Name.size() - 4); 5740 } else if (Name.endswith("_e32")) { 5741 setForcedEncodingSize(32); 5742 return Name.substr(0, Name.size() - 4); 5743 } else if (Name.endswith("_dpp")) { 5744 setForcedDPP(true); 5745 return Name.substr(0, Name.size() - 4); 5746 } else if (Name.endswith("_sdwa")) { 5747 setForcedSDWA(true); 5748 return Name.substr(0, Name.size() - 5); 5749 } 5750 return Name; 5751 } 5752 5753 static void applyMnemonicAliases(StringRef &Mnemonic, 5754 const FeatureBitset &Features, 5755 unsigned VariantID); 5756 5757 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5758 StringRef Name, 5759 SMLoc NameLoc, OperandVector &Operands) { 5760 // Add the instruction mnemonic 5761 Name = parseMnemonicSuffix(Name); 5762 5763 // If the target architecture uses MnemonicAlias, call it here to parse 5764 // operands correctly. 5765 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5766 5767 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5768 5769 bool IsMIMG = Name.startswith("image_"); 5770 5771 while (!trySkipToken(AsmToken::EndOfStatement)) { 5772 OperandMode Mode = OperandMode_Default; 5773 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5774 Mode = OperandMode_NSA; 5775 CPolSeen = 0; 5776 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5777 5778 if (Res != MatchOperand_Success) { 5779 checkUnsupportedInstruction(Name, NameLoc); 5780 if (!Parser.hasPendingError()) { 5781 // FIXME: use real operand location rather than the current location. 5782 StringRef Msg = 5783 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5784 "not a valid operand."; 5785 Error(getLoc(), Msg); 5786 } 5787 while (!trySkipToken(AsmToken::EndOfStatement)) { 5788 lex(); 5789 } 5790 return true; 5791 } 5792 5793 // Eat the comma or space if there is one. 5794 trySkipToken(AsmToken::Comma); 5795 } 5796 5797 return false; 5798 } 5799 5800 //===----------------------------------------------------------------------===// 5801 // Utility functions 5802 //===----------------------------------------------------------------------===// 5803 5804 OperandMatchResultTy 5805 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5806 5807 if (!trySkipId(Prefix, AsmToken::Colon)) 5808 return MatchOperand_NoMatch; 5809 5810 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5811 } 5812 5813 OperandMatchResultTy 5814 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5815 AMDGPUOperand::ImmTy ImmTy, 5816 bool (*ConvertResult)(int64_t&)) { 5817 SMLoc S = getLoc(); 5818 int64_t Value = 0; 5819 5820 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5821 if (Res != MatchOperand_Success) 5822 return Res; 5823 5824 if (ConvertResult && !ConvertResult(Value)) { 5825 Error(S, "invalid " + StringRef(Prefix) + " value."); 5826 } 5827 5828 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5829 return MatchOperand_Success; 5830 } 5831 5832 OperandMatchResultTy 5833 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5834 OperandVector &Operands, 5835 AMDGPUOperand::ImmTy ImmTy, 5836 bool (*ConvertResult)(int64_t&)) { 5837 SMLoc S = getLoc(); 5838 if (!trySkipId(Prefix, AsmToken::Colon)) 5839 return MatchOperand_NoMatch; 5840 5841 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5842 return MatchOperand_ParseFail; 5843 5844 unsigned Val = 0; 5845 const unsigned MaxSize = 4; 5846 5847 // FIXME: How to verify the number of elements matches the number of src 5848 // operands? 5849 for (int I = 0; ; ++I) { 5850 int64_t Op; 5851 SMLoc Loc = getLoc(); 5852 if (!parseExpr(Op)) 5853 return MatchOperand_ParseFail; 5854 5855 if (Op != 0 && Op != 1) { 5856 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5857 return MatchOperand_ParseFail; 5858 } 5859 5860 Val |= (Op << I); 5861 5862 if (trySkipToken(AsmToken::RBrac)) 5863 break; 5864 5865 if (I + 1 == MaxSize) { 5866 Error(getLoc(), "expected a closing square bracket"); 5867 return MatchOperand_ParseFail; 5868 } 5869 5870 if (!skipToken(AsmToken::Comma, "expected a comma")) 5871 return MatchOperand_ParseFail; 5872 } 5873 5874 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5875 return MatchOperand_Success; 5876 } 5877 5878 OperandMatchResultTy 5879 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5880 AMDGPUOperand::ImmTy ImmTy) { 5881 int64_t Bit; 5882 SMLoc S = getLoc(); 5883 5884 if (trySkipId(Name)) { 5885 Bit = 1; 5886 } else if (trySkipId("no", Name)) { 5887 Bit = 0; 5888 } else { 5889 return MatchOperand_NoMatch; 5890 } 5891 5892 if (Name == "r128" && !hasMIMG_R128()) { 5893 Error(S, "r128 modifier is not supported on this GPU"); 5894 return MatchOperand_ParseFail; 5895 } 5896 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5897 Error(S, "a16 modifier is not supported on this GPU"); 5898 return MatchOperand_ParseFail; 5899 } 5900 5901 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5902 ImmTy = AMDGPUOperand::ImmTyR128A16; 5903 5904 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5905 return MatchOperand_Success; 5906 } 5907 5908 OperandMatchResultTy 5909 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5910 unsigned CPolOn = 0; 5911 unsigned CPolOff = 0; 5912 SMLoc S = getLoc(); 5913 5914 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5915 if (isGFX940() && !Mnemo.startswith("s_")) { 5916 if (trySkipId("sc0")) 5917 CPolOn = AMDGPU::CPol::SC0; 5918 else if (trySkipId("nosc0")) 5919 CPolOff = AMDGPU::CPol::SC0; 5920 else if (trySkipId("nt")) 5921 CPolOn = AMDGPU::CPol::NT; 5922 else if (trySkipId("nont")) 5923 CPolOff = AMDGPU::CPol::NT; 5924 else if (trySkipId("sc1")) 5925 CPolOn = AMDGPU::CPol::SC1; 5926 else if (trySkipId("nosc1")) 5927 CPolOff = AMDGPU::CPol::SC1; 5928 else 5929 return MatchOperand_NoMatch; 5930 } 5931 else if (trySkipId("glc")) 5932 CPolOn = AMDGPU::CPol::GLC; 5933 else if (trySkipId("noglc")) 5934 CPolOff = AMDGPU::CPol::GLC; 5935 else if (trySkipId("slc")) 5936 CPolOn = AMDGPU::CPol::SLC; 5937 else if (trySkipId("noslc")) 5938 CPolOff = AMDGPU::CPol::SLC; 5939 else if (trySkipId("dlc")) 5940 CPolOn = AMDGPU::CPol::DLC; 5941 else if (trySkipId("nodlc")) 5942 CPolOff = AMDGPU::CPol::DLC; 5943 else if (trySkipId("scc")) 5944 CPolOn = AMDGPU::CPol::SCC; 5945 else if (trySkipId("noscc")) 5946 CPolOff = AMDGPU::CPol::SCC; 5947 else 5948 return MatchOperand_NoMatch; 5949 5950 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5951 Error(S, "dlc modifier is not supported on this GPU"); 5952 return MatchOperand_ParseFail; 5953 } 5954 5955 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5956 Error(S, "scc modifier is not supported on this GPU"); 5957 return MatchOperand_ParseFail; 5958 } 5959 5960 if (CPolSeen & (CPolOn | CPolOff)) { 5961 Error(S, "duplicate cache policy modifier"); 5962 return MatchOperand_ParseFail; 5963 } 5964 5965 CPolSeen |= (CPolOn | CPolOff); 5966 5967 for (unsigned I = 1; I != Operands.size(); ++I) { 5968 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5969 if (Op.isCPol()) { 5970 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5971 return MatchOperand_Success; 5972 } 5973 } 5974 5975 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5976 AMDGPUOperand::ImmTyCPol)); 5977 5978 return MatchOperand_Success; 5979 } 5980 5981 static void addOptionalImmOperand( 5982 MCInst& Inst, const OperandVector& Operands, 5983 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5984 AMDGPUOperand::ImmTy ImmT, 5985 int64_t Default = 0) { 5986 auto i = OptionalIdx.find(ImmT); 5987 if (i != OptionalIdx.end()) { 5988 unsigned Idx = i->second; 5989 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5990 } else { 5991 Inst.addOperand(MCOperand::createImm(Default)); 5992 } 5993 } 5994 5995 OperandMatchResultTy 5996 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5997 StringRef &Value, 5998 SMLoc &StringLoc) { 5999 if (!trySkipId(Prefix, AsmToken::Colon)) 6000 return MatchOperand_NoMatch; 6001 6002 StringLoc = getLoc(); 6003 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6004 : MatchOperand_ParseFail; 6005 } 6006 6007 //===----------------------------------------------------------------------===// 6008 // MTBUF format 6009 //===----------------------------------------------------------------------===// 6010 6011 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6012 int64_t MaxVal, 6013 int64_t &Fmt) { 6014 int64_t Val; 6015 SMLoc Loc = getLoc(); 6016 6017 auto Res = parseIntWithPrefix(Pref, Val); 6018 if (Res == MatchOperand_ParseFail) 6019 return false; 6020 if (Res == MatchOperand_NoMatch) 6021 return true; 6022 6023 if (Val < 0 || Val > MaxVal) { 6024 Error(Loc, Twine("out of range ", StringRef(Pref))); 6025 return false; 6026 } 6027 6028 Fmt = Val; 6029 return true; 6030 } 6031 6032 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6033 // values to live in a joint format operand in the MCInst encoding. 6034 OperandMatchResultTy 6035 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6036 using namespace llvm::AMDGPU::MTBUFFormat; 6037 6038 int64_t Dfmt = DFMT_UNDEF; 6039 int64_t Nfmt = NFMT_UNDEF; 6040 6041 // dfmt and nfmt can appear in either order, and each is optional. 6042 for (int I = 0; I < 2; ++I) { 6043 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6044 return MatchOperand_ParseFail; 6045 6046 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6047 return MatchOperand_ParseFail; 6048 } 6049 // Skip optional comma between dfmt/nfmt 6050 // but guard against 2 commas following each other. 6051 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6052 !peekToken().is(AsmToken::Comma)) { 6053 trySkipToken(AsmToken::Comma); 6054 } 6055 } 6056 6057 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6058 return MatchOperand_NoMatch; 6059 6060 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6061 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6062 6063 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6064 return MatchOperand_Success; 6065 } 6066 6067 OperandMatchResultTy 6068 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6069 using namespace llvm::AMDGPU::MTBUFFormat; 6070 6071 int64_t Fmt = UFMT_UNDEF; 6072 6073 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6074 return MatchOperand_ParseFail; 6075 6076 if (Fmt == UFMT_UNDEF) 6077 return MatchOperand_NoMatch; 6078 6079 Format = Fmt; 6080 return MatchOperand_Success; 6081 } 6082 6083 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6084 int64_t &Nfmt, 6085 StringRef FormatStr, 6086 SMLoc Loc) { 6087 using namespace llvm::AMDGPU::MTBUFFormat; 6088 int64_t Format; 6089 6090 Format = getDfmt(FormatStr); 6091 if (Format != DFMT_UNDEF) { 6092 Dfmt = Format; 6093 return true; 6094 } 6095 6096 Format = getNfmt(FormatStr, getSTI()); 6097 if (Format != NFMT_UNDEF) { 6098 Nfmt = Format; 6099 return true; 6100 } 6101 6102 Error(Loc, "unsupported format"); 6103 return false; 6104 } 6105 6106 OperandMatchResultTy 6107 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6108 SMLoc FormatLoc, 6109 int64_t &Format) { 6110 using namespace llvm::AMDGPU::MTBUFFormat; 6111 6112 int64_t Dfmt = DFMT_UNDEF; 6113 int64_t Nfmt = NFMT_UNDEF; 6114 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6115 return MatchOperand_ParseFail; 6116 6117 if (trySkipToken(AsmToken::Comma)) { 6118 StringRef Str; 6119 SMLoc Loc = getLoc(); 6120 if (!parseId(Str, "expected a format string") || 6121 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6122 return MatchOperand_ParseFail; 6123 } 6124 if (Dfmt == DFMT_UNDEF) { 6125 Error(Loc, "duplicate numeric format"); 6126 return MatchOperand_ParseFail; 6127 } else if (Nfmt == NFMT_UNDEF) { 6128 Error(Loc, "duplicate data format"); 6129 return MatchOperand_ParseFail; 6130 } 6131 } 6132 6133 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6134 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6135 6136 if (isGFX10Plus()) { 6137 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6138 if (Ufmt == UFMT_UNDEF) { 6139 Error(FormatLoc, "unsupported format"); 6140 return MatchOperand_ParseFail; 6141 } 6142 Format = Ufmt; 6143 } else { 6144 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6145 } 6146 6147 return MatchOperand_Success; 6148 } 6149 6150 OperandMatchResultTy 6151 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6152 SMLoc Loc, 6153 int64_t &Format) { 6154 using namespace llvm::AMDGPU::MTBUFFormat; 6155 6156 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6157 if (Id == UFMT_UNDEF) 6158 return MatchOperand_NoMatch; 6159 6160 if (!isGFX10Plus()) { 6161 Error(Loc, "unified format is not supported on this GPU"); 6162 return MatchOperand_ParseFail; 6163 } 6164 6165 Format = Id; 6166 return MatchOperand_Success; 6167 } 6168 6169 OperandMatchResultTy 6170 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6171 using namespace llvm::AMDGPU::MTBUFFormat; 6172 SMLoc Loc = getLoc(); 6173 6174 if (!parseExpr(Format)) 6175 return MatchOperand_ParseFail; 6176 if (!isValidFormatEncoding(Format, getSTI())) { 6177 Error(Loc, "out of range format"); 6178 return MatchOperand_ParseFail; 6179 } 6180 6181 return MatchOperand_Success; 6182 } 6183 6184 OperandMatchResultTy 6185 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6186 using namespace llvm::AMDGPU::MTBUFFormat; 6187 6188 if (!trySkipId("format", AsmToken::Colon)) 6189 return MatchOperand_NoMatch; 6190 6191 if (trySkipToken(AsmToken::LBrac)) { 6192 StringRef FormatStr; 6193 SMLoc Loc = getLoc(); 6194 if (!parseId(FormatStr, "expected a format string")) 6195 return MatchOperand_ParseFail; 6196 6197 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6198 if (Res == MatchOperand_NoMatch) 6199 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6200 if (Res != MatchOperand_Success) 6201 return Res; 6202 6203 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6204 return MatchOperand_ParseFail; 6205 6206 return MatchOperand_Success; 6207 } 6208 6209 return parseNumericFormat(Format); 6210 } 6211 6212 OperandMatchResultTy 6213 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6214 using namespace llvm::AMDGPU::MTBUFFormat; 6215 6216 int64_t Format = getDefaultFormatEncoding(getSTI()); 6217 OperandMatchResultTy Res; 6218 SMLoc Loc = getLoc(); 6219 6220 // Parse legacy format syntax. 6221 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6222 if (Res == MatchOperand_ParseFail) 6223 return Res; 6224 6225 bool FormatFound = (Res == MatchOperand_Success); 6226 6227 Operands.push_back( 6228 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6229 6230 if (FormatFound) 6231 trySkipToken(AsmToken::Comma); 6232 6233 if (isToken(AsmToken::EndOfStatement)) { 6234 // We are expecting an soffset operand, 6235 // but let matcher handle the error. 6236 return MatchOperand_Success; 6237 } 6238 6239 // Parse soffset. 6240 Res = parseRegOrImm(Operands); 6241 if (Res != MatchOperand_Success) 6242 return Res; 6243 6244 trySkipToken(AsmToken::Comma); 6245 6246 if (!FormatFound) { 6247 Res = parseSymbolicOrNumericFormat(Format); 6248 if (Res == MatchOperand_ParseFail) 6249 return Res; 6250 if (Res == MatchOperand_Success) { 6251 auto Size = Operands.size(); 6252 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6253 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6254 Op.setImm(Format); 6255 } 6256 return MatchOperand_Success; 6257 } 6258 6259 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6260 Error(getLoc(), "duplicate format"); 6261 return MatchOperand_ParseFail; 6262 } 6263 return MatchOperand_Success; 6264 } 6265 6266 //===----------------------------------------------------------------------===// 6267 // ds 6268 //===----------------------------------------------------------------------===// 6269 6270 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6271 const OperandVector &Operands) { 6272 OptionalImmIndexMap OptionalIdx; 6273 6274 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6276 6277 // Add the register arguments 6278 if (Op.isReg()) { 6279 Op.addRegOperands(Inst, 1); 6280 continue; 6281 } 6282 6283 // Handle optional arguments 6284 OptionalIdx[Op.getImmTy()] = i; 6285 } 6286 6287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6290 6291 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6292 } 6293 6294 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6295 bool IsGdsHardcoded) { 6296 OptionalImmIndexMap OptionalIdx; 6297 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6298 6299 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6301 6302 // Add the register arguments 6303 if (Op.isReg()) { 6304 Op.addRegOperands(Inst, 1); 6305 continue; 6306 } 6307 6308 if (Op.isToken() && Op.getToken() == "gds") { 6309 IsGdsHardcoded = true; 6310 continue; 6311 } 6312 6313 // Handle optional arguments 6314 OptionalIdx[Op.getImmTy()] = i; 6315 6316 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6317 OffsetType = AMDGPUOperand::ImmTySwizzle; 6318 } 6319 6320 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6321 6322 if (!IsGdsHardcoded) { 6323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6324 } 6325 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6326 } 6327 6328 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6329 OptionalImmIndexMap OptionalIdx; 6330 6331 unsigned OperandIdx[4]; 6332 unsigned EnMask = 0; 6333 int SrcIdx = 0; 6334 6335 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6336 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6337 6338 // Add the register arguments 6339 if (Op.isReg()) { 6340 assert(SrcIdx < 4); 6341 OperandIdx[SrcIdx] = Inst.size(); 6342 Op.addRegOperands(Inst, 1); 6343 ++SrcIdx; 6344 continue; 6345 } 6346 6347 if (Op.isOff()) { 6348 assert(SrcIdx < 4); 6349 OperandIdx[SrcIdx] = Inst.size(); 6350 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6351 ++SrcIdx; 6352 continue; 6353 } 6354 6355 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6356 Op.addImmOperands(Inst, 1); 6357 continue; 6358 } 6359 6360 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6361 continue; 6362 6363 // Handle optional arguments 6364 OptionalIdx[Op.getImmTy()] = i; 6365 } 6366 6367 assert(SrcIdx == 4); 6368 6369 bool Compr = false; 6370 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6371 Compr = true; 6372 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6373 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6374 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6375 } 6376 6377 for (auto i = 0; i < SrcIdx; ++i) { 6378 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6379 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6380 } 6381 } 6382 6383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6385 6386 Inst.addOperand(MCOperand::createImm(EnMask)); 6387 } 6388 6389 //===----------------------------------------------------------------------===// 6390 // s_waitcnt 6391 //===----------------------------------------------------------------------===// 6392 6393 static bool 6394 encodeCnt( 6395 const AMDGPU::IsaVersion ISA, 6396 int64_t &IntVal, 6397 int64_t CntVal, 6398 bool Saturate, 6399 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6400 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6401 { 6402 bool Failed = false; 6403 6404 IntVal = encode(ISA, IntVal, CntVal); 6405 if (CntVal != decode(ISA, IntVal)) { 6406 if (Saturate) { 6407 IntVal = encode(ISA, IntVal, -1); 6408 } else { 6409 Failed = true; 6410 } 6411 } 6412 return Failed; 6413 } 6414 6415 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6416 6417 SMLoc CntLoc = getLoc(); 6418 StringRef CntName = getTokenStr(); 6419 6420 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6421 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6422 return false; 6423 6424 int64_t CntVal; 6425 SMLoc ValLoc = getLoc(); 6426 if (!parseExpr(CntVal)) 6427 return false; 6428 6429 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6430 6431 bool Failed = true; 6432 bool Sat = CntName.endswith("_sat"); 6433 6434 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6435 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6436 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6437 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6438 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6439 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6440 } else { 6441 Error(CntLoc, "invalid counter name " + CntName); 6442 return false; 6443 } 6444 6445 if (Failed) { 6446 Error(ValLoc, "too large value for " + CntName); 6447 return false; 6448 } 6449 6450 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6451 return false; 6452 6453 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6454 if (isToken(AsmToken::EndOfStatement)) { 6455 Error(getLoc(), "expected a counter name"); 6456 return false; 6457 } 6458 } 6459 6460 return true; 6461 } 6462 6463 OperandMatchResultTy 6464 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6465 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6466 int64_t Waitcnt = getWaitcntBitMask(ISA); 6467 SMLoc S = getLoc(); 6468 6469 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6470 while (!isToken(AsmToken::EndOfStatement)) { 6471 if (!parseCnt(Waitcnt)) 6472 return MatchOperand_ParseFail; 6473 } 6474 } else { 6475 if (!parseExpr(Waitcnt)) 6476 return MatchOperand_ParseFail; 6477 } 6478 6479 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6480 return MatchOperand_Success; 6481 } 6482 6483 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6484 SMLoc FieldLoc = getLoc(); 6485 StringRef FieldName = getTokenStr(); 6486 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6487 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6488 return false; 6489 6490 SMLoc ValueLoc = getLoc(); 6491 StringRef ValueName = getTokenStr(); 6492 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6493 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6494 return false; 6495 6496 unsigned Shift; 6497 if (FieldName == "instid0") { 6498 Shift = 0; 6499 } else if (FieldName == "instskip") { 6500 Shift = 4; 6501 } else if (FieldName == "instid1") { 6502 Shift = 7; 6503 } else { 6504 Error(FieldLoc, "invalid field name " + FieldName); 6505 return false; 6506 } 6507 6508 int Value; 6509 if (Shift == 4) { 6510 // Parse values for instskip. 6511 Value = StringSwitch<int>(ValueName) 6512 .Case("SAME", 0) 6513 .Case("NEXT", 1) 6514 .Case("SKIP_1", 2) 6515 .Case("SKIP_2", 3) 6516 .Case("SKIP_3", 4) 6517 .Case("SKIP_4", 5) 6518 .Default(-1); 6519 } else { 6520 // Parse values for instid0 and instid1. 6521 Value = StringSwitch<int>(ValueName) 6522 .Case("NO_DEP", 0) 6523 .Case("VALU_DEP_1", 1) 6524 .Case("VALU_DEP_2", 2) 6525 .Case("VALU_DEP_3", 3) 6526 .Case("VALU_DEP_4", 4) 6527 .Case("TRANS32_DEP_1", 5) 6528 .Case("TRANS32_DEP_2", 6) 6529 .Case("TRANS32_DEP_3", 7) 6530 .Case("FMA_ACCUM_CYCLE_1", 8) 6531 .Case("SALU_CYCLE_1", 9) 6532 .Case("SALU_CYCLE_2", 10) 6533 .Case("SALU_CYCLE_3", 11) 6534 .Default(-1); 6535 } 6536 if (Value < 0) { 6537 Error(ValueLoc, "invalid value name " + ValueName); 6538 return false; 6539 } 6540 6541 Delay |= Value << Shift; 6542 return true; 6543 } 6544 6545 OperandMatchResultTy 6546 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6547 int64_t Delay = 0; 6548 SMLoc S = getLoc(); 6549 6550 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6551 do { 6552 if (!parseDelay(Delay)) 6553 return MatchOperand_ParseFail; 6554 } while (trySkipToken(AsmToken::Pipe)); 6555 } else { 6556 if (!parseExpr(Delay)) 6557 return MatchOperand_ParseFail; 6558 } 6559 6560 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6561 return MatchOperand_Success; 6562 } 6563 6564 bool 6565 AMDGPUOperand::isSWaitCnt() const { 6566 return isImm(); 6567 } 6568 6569 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6570 6571 //===----------------------------------------------------------------------===// 6572 // DepCtr 6573 //===----------------------------------------------------------------------===// 6574 6575 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6576 StringRef DepCtrName) { 6577 switch (ErrorId) { 6578 case OPR_ID_UNKNOWN: 6579 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6580 return; 6581 case OPR_ID_UNSUPPORTED: 6582 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6583 return; 6584 case OPR_ID_DUPLICATE: 6585 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6586 return; 6587 case OPR_VAL_INVALID: 6588 Error(Loc, Twine("invalid value for ", DepCtrName)); 6589 return; 6590 default: 6591 assert(false); 6592 } 6593 } 6594 6595 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6596 6597 using namespace llvm::AMDGPU::DepCtr; 6598 6599 SMLoc DepCtrLoc = getLoc(); 6600 StringRef DepCtrName = getTokenStr(); 6601 6602 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6603 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6604 return false; 6605 6606 int64_t ExprVal; 6607 if (!parseExpr(ExprVal)) 6608 return false; 6609 6610 unsigned PrevOprMask = UsedOprMask; 6611 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6612 6613 if (CntVal < 0) { 6614 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6615 return false; 6616 } 6617 6618 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6619 return false; 6620 6621 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6622 if (isToken(AsmToken::EndOfStatement)) { 6623 Error(getLoc(), "expected a counter name"); 6624 return false; 6625 } 6626 } 6627 6628 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6629 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6630 return true; 6631 } 6632 6633 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6634 using namespace llvm::AMDGPU::DepCtr; 6635 6636 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6637 SMLoc Loc = getLoc(); 6638 6639 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6640 unsigned UsedOprMask = 0; 6641 while (!isToken(AsmToken::EndOfStatement)) { 6642 if (!parseDepCtr(DepCtr, UsedOprMask)) 6643 return MatchOperand_ParseFail; 6644 } 6645 } else { 6646 if (!parseExpr(DepCtr)) 6647 return MatchOperand_ParseFail; 6648 } 6649 6650 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6651 return MatchOperand_Success; 6652 } 6653 6654 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6655 6656 //===----------------------------------------------------------------------===// 6657 // hwreg 6658 //===----------------------------------------------------------------------===// 6659 6660 bool 6661 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6662 OperandInfoTy &Offset, 6663 OperandInfoTy &Width) { 6664 using namespace llvm::AMDGPU::Hwreg; 6665 6666 // The register may be specified by name or using a numeric code 6667 HwReg.Loc = getLoc(); 6668 if (isToken(AsmToken::Identifier) && 6669 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6670 HwReg.IsSymbolic = true; 6671 lex(); // skip register name 6672 } else if (!parseExpr(HwReg.Id, "a register name")) { 6673 return false; 6674 } 6675 6676 if (trySkipToken(AsmToken::RParen)) 6677 return true; 6678 6679 // parse optional params 6680 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6681 return false; 6682 6683 Offset.Loc = getLoc(); 6684 if (!parseExpr(Offset.Id)) 6685 return false; 6686 6687 if (!skipToken(AsmToken::Comma, "expected a comma")) 6688 return false; 6689 6690 Width.Loc = getLoc(); 6691 return parseExpr(Width.Id) && 6692 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6693 } 6694 6695 bool 6696 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6697 const OperandInfoTy &Offset, 6698 const OperandInfoTy &Width) { 6699 6700 using namespace llvm::AMDGPU::Hwreg; 6701 6702 if (HwReg.IsSymbolic) { 6703 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6704 Error(HwReg.Loc, 6705 "specified hardware register is not supported on this GPU"); 6706 return false; 6707 } 6708 } else { 6709 if (!isValidHwreg(HwReg.Id)) { 6710 Error(HwReg.Loc, 6711 "invalid code of hardware register: only 6-bit values are legal"); 6712 return false; 6713 } 6714 } 6715 if (!isValidHwregOffset(Offset.Id)) { 6716 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6717 return false; 6718 } 6719 if (!isValidHwregWidth(Width.Id)) { 6720 Error(Width.Loc, 6721 "invalid bitfield width: only values from 1 to 32 are legal"); 6722 return false; 6723 } 6724 return true; 6725 } 6726 6727 OperandMatchResultTy 6728 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6729 using namespace llvm::AMDGPU::Hwreg; 6730 6731 int64_t ImmVal = 0; 6732 SMLoc Loc = getLoc(); 6733 6734 if (trySkipId("hwreg", AsmToken::LParen)) { 6735 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6736 OperandInfoTy Offset(OFFSET_DEFAULT_); 6737 OperandInfoTy Width(WIDTH_DEFAULT_); 6738 if (parseHwregBody(HwReg, Offset, Width) && 6739 validateHwreg(HwReg, Offset, Width)) { 6740 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6741 } else { 6742 return MatchOperand_ParseFail; 6743 } 6744 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6745 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6746 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6747 return MatchOperand_ParseFail; 6748 } 6749 } else { 6750 return MatchOperand_ParseFail; 6751 } 6752 6753 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6754 return MatchOperand_Success; 6755 } 6756 6757 bool AMDGPUOperand::isHwreg() const { 6758 return isImmTy(ImmTyHwreg); 6759 } 6760 6761 //===----------------------------------------------------------------------===// 6762 // sendmsg 6763 //===----------------------------------------------------------------------===// 6764 6765 bool 6766 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6767 OperandInfoTy &Op, 6768 OperandInfoTy &Stream) { 6769 using namespace llvm::AMDGPU::SendMsg; 6770 6771 Msg.Loc = getLoc(); 6772 if (isToken(AsmToken::Identifier) && 6773 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6774 Msg.IsSymbolic = true; 6775 lex(); // skip message name 6776 } else if (!parseExpr(Msg.Id, "a message name")) { 6777 return false; 6778 } 6779 6780 if (trySkipToken(AsmToken::Comma)) { 6781 Op.IsDefined = true; 6782 Op.Loc = getLoc(); 6783 if (isToken(AsmToken::Identifier) && 6784 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6785 lex(); // skip operation name 6786 } else if (!parseExpr(Op.Id, "an operation name")) { 6787 return false; 6788 } 6789 6790 if (trySkipToken(AsmToken::Comma)) { 6791 Stream.IsDefined = true; 6792 Stream.Loc = getLoc(); 6793 if (!parseExpr(Stream.Id)) 6794 return false; 6795 } 6796 } 6797 6798 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6799 } 6800 6801 bool 6802 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6803 const OperandInfoTy &Op, 6804 const OperandInfoTy &Stream) { 6805 using namespace llvm::AMDGPU::SendMsg; 6806 6807 // Validation strictness depends on whether message is specified 6808 // in a symbolic or in a numeric form. In the latter case 6809 // only encoding possibility is checked. 6810 bool Strict = Msg.IsSymbolic; 6811 6812 if (Strict) { 6813 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6814 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6815 return false; 6816 } 6817 } else { 6818 if (!isValidMsgId(Msg.Id, getSTI())) { 6819 Error(Msg.Loc, "invalid message id"); 6820 return false; 6821 } 6822 } 6823 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6824 if (Op.IsDefined) { 6825 Error(Op.Loc, "message does not support operations"); 6826 } else { 6827 Error(Msg.Loc, "missing message operation"); 6828 } 6829 return false; 6830 } 6831 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6832 Error(Op.Loc, "invalid operation id"); 6833 return false; 6834 } 6835 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6836 Stream.IsDefined) { 6837 Error(Stream.Loc, "message operation does not support streams"); 6838 return false; 6839 } 6840 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6841 Error(Stream.Loc, "invalid message stream id"); 6842 return false; 6843 } 6844 return true; 6845 } 6846 6847 OperandMatchResultTy 6848 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6849 using namespace llvm::AMDGPU::SendMsg; 6850 6851 int64_t ImmVal = 0; 6852 SMLoc Loc = getLoc(); 6853 6854 if (trySkipId("sendmsg", AsmToken::LParen)) { 6855 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6856 OperandInfoTy Op(OP_NONE_); 6857 OperandInfoTy Stream(STREAM_ID_NONE_); 6858 if (parseSendMsgBody(Msg, Op, Stream) && 6859 validateSendMsg(Msg, Op, Stream)) { 6860 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6861 } else { 6862 return MatchOperand_ParseFail; 6863 } 6864 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6865 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6866 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6867 return MatchOperand_ParseFail; 6868 } 6869 } else { 6870 return MatchOperand_ParseFail; 6871 } 6872 6873 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6874 return MatchOperand_Success; 6875 } 6876 6877 bool AMDGPUOperand::isSendMsg() const { 6878 return isImmTy(ImmTySendMsg); 6879 } 6880 6881 //===----------------------------------------------------------------------===// 6882 // v_interp 6883 //===----------------------------------------------------------------------===// 6884 6885 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6886 StringRef Str; 6887 SMLoc S = getLoc(); 6888 6889 if (!parseId(Str)) 6890 return MatchOperand_NoMatch; 6891 6892 int Slot = StringSwitch<int>(Str) 6893 .Case("p10", 0) 6894 .Case("p20", 1) 6895 .Case("p0", 2) 6896 .Default(-1); 6897 6898 if (Slot == -1) { 6899 Error(S, "invalid interpolation slot"); 6900 return MatchOperand_ParseFail; 6901 } 6902 6903 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6904 AMDGPUOperand::ImmTyInterpSlot)); 6905 return MatchOperand_Success; 6906 } 6907 6908 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6909 StringRef Str; 6910 SMLoc S = getLoc(); 6911 6912 if (!parseId(Str)) 6913 return MatchOperand_NoMatch; 6914 6915 if (!Str.startswith("attr")) { 6916 Error(S, "invalid interpolation attribute"); 6917 return MatchOperand_ParseFail; 6918 } 6919 6920 StringRef Chan = Str.take_back(2); 6921 int AttrChan = StringSwitch<int>(Chan) 6922 .Case(".x", 0) 6923 .Case(".y", 1) 6924 .Case(".z", 2) 6925 .Case(".w", 3) 6926 .Default(-1); 6927 if (AttrChan == -1) { 6928 Error(S, "invalid or missing interpolation attribute channel"); 6929 return MatchOperand_ParseFail; 6930 } 6931 6932 Str = Str.drop_back(2).drop_front(4); 6933 6934 uint8_t Attr; 6935 if (Str.getAsInteger(10, Attr)) { 6936 Error(S, "invalid or missing interpolation attribute number"); 6937 return MatchOperand_ParseFail; 6938 } 6939 6940 if (Attr > 63) { 6941 Error(S, "out of bounds interpolation attribute number"); 6942 return MatchOperand_ParseFail; 6943 } 6944 6945 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6946 6947 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6948 AMDGPUOperand::ImmTyInterpAttr)); 6949 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6950 AMDGPUOperand::ImmTyAttrChan)); 6951 return MatchOperand_Success; 6952 } 6953 6954 //===----------------------------------------------------------------------===// 6955 // exp 6956 //===----------------------------------------------------------------------===// 6957 6958 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6959 using namespace llvm::AMDGPU::Exp; 6960 6961 StringRef Str; 6962 SMLoc S = getLoc(); 6963 6964 if (!parseId(Str)) 6965 return MatchOperand_NoMatch; 6966 6967 unsigned Id = getTgtId(Str); 6968 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6969 Error(S, (Id == ET_INVALID) ? 6970 "invalid exp target" : 6971 "exp target is not supported on this GPU"); 6972 return MatchOperand_ParseFail; 6973 } 6974 6975 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6976 AMDGPUOperand::ImmTyExpTgt)); 6977 return MatchOperand_Success; 6978 } 6979 6980 //===----------------------------------------------------------------------===// 6981 // parser helpers 6982 //===----------------------------------------------------------------------===// 6983 6984 bool 6985 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6986 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6987 } 6988 6989 bool 6990 AMDGPUAsmParser::isId(const StringRef Id) const { 6991 return isId(getToken(), Id); 6992 } 6993 6994 bool 6995 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6996 return getTokenKind() == Kind; 6997 } 6998 6999 bool 7000 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7001 if (isId(Id)) { 7002 lex(); 7003 return true; 7004 } 7005 return false; 7006 } 7007 7008 bool 7009 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7010 if (isToken(AsmToken::Identifier)) { 7011 StringRef Tok = getTokenStr(); 7012 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7013 lex(); 7014 return true; 7015 } 7016 } 7017 return false; 7018 } 7019 7020 bool 7021 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7022 if (isId(Id) && peekToken().is(Kind)) { 7023 lex(); 7024 lex(); 7025 return true; 7026 } 7027 return false; 7028 } 7029 7030 bool 7031 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7032 if (isToken(Kind)) { 7033 lex(); 7034 return true; 7035 } 7036 return false; 7037 } 7038 7039 bool 7040 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7041 const StringRef ErrMsg) { 7042 if (!trySkipToken(Kind)) { 7043 Error(getLoc(), ErrMsg); 7044 return false; 7045 } 7046 return true; 7047 } 7048 7049 bool 7050 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7051 SMLoc S = getLoc(); 7052 7053 const MCExpr *Expr; 7054 if (Parser.parseExpression(Expr)) 7055 return false; 7056 7057 if (Expr->evaluateAsAbsolute(Imm)) 7058 return true; 7059 7060 if (Expected.empty()) { 7061 Error(S, "expected absolute expression"); 7062 } else { 7063 Error(S, Twine("expected ", Expected) + 7064 Twine(" or an absolute expression")); 7065 } 7066 return false; 7067 } 7068 7069 bool 7070 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7071 SMLoc S = getLoc(); 7072 7073 const MCExpr *Expr; 7074 if (Parser.parseExpression(Expr)) 7075 return false; 7076 7077 int64_t IntVal; 7078 if (Expr->evaluateAsAbsolute(IntVal)) { 7079 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7080 } else { 7081 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7082 } 7083 return true; 7084 } 7085 7086 bool 7087 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7088 if (isToken(AsmToken::String)) { 7089 Val = getToken().getStringContents(); 7090 lex(); 7091 return true; 7092 } else { 7093 Error(getLoc(), ErrMsg); 7094 return false; 7095 } 7096 } 7097 7098 bool 7099 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7100 if (isToken(AsmToken::Identifier)) { 7101 Val = getTokenStr(); 7102 lex(); 7103 return true; 7104 } else { 7105 if (!ErrMsg.empty()) 7106 Error(getLoc(), ErrMsg); 7107 return false; 7108 } 7109 } 7110 7111 AsmToken 7112 AMDGPUAsmParser::getToken() const { 7113 return Parser.getTok(); 7114 } 7115 7116 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7117 return isToken(AsmToken::EndOfStatement) 7118 ? getToken() 7119 : getLexer().peekTok(ShouldSkipSpace); 7120 } 7121 7122 void 7123 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7124 auto TokCount = getLexer().peekTokens(Tokens); 7125 7126 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7127 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7128 } 7129 7130 AsmToken::TokenKind 7131 AMDGPUAsmParser::getTokenKind() const { 7132 return getLexer().getKind(); 7133 } 7134 7135 SMLoc 7136 AMDGPUAsmParser::getLoc() const { 7137 return getToken().getLoc(); 7138 } 7139 7140 StringRef 7141 AMDGPUAsmParser::getTokenStr() const { 7142 return getToken().getString(); 7143 } 7144 7145 void 7146 AMDGPUAsmParser::lex() { 7147 Parser.Lex(); 7148 } 7149 7150 SMLoc 7151 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7152 const OperandVector &Operands) const { 7153 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7154 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7155 if (Test(Op)) 7156 return Op.getStartLoc(); 7157 } 7158 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7159 } 7160 7161 SMLoc 7162 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7163 const OperandVector &Operands) const { 7164 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7165 return getOperandLoc(Test, Operands); 7166 } 7167 7168 SMLoc 7169 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7170 const OperandVector &Operands) const { 7171 auto Test = [=](const AMDGPUOperand& Op) { 7172 return Op.isRegKind() && Op.getReg() == Reg; 7173 }; 7174 return getOperandLoc(Test, Operands); 7175 } 7176 7177 SMLoc 7178 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7179 auto Test = [](const AMDGPUOperand& Op) { 7180 return Op.IsImmKindLiteral() || Op.isExpr(); 7181 }; 7182 return getOperandLoc(Test, Operands); 7183 } 7184 7185 SMLoc 7186 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7187 auto Test = [](const AMDGPUOperand& Op) { 7188 return Op.isImmKindConst(); 7189 }; 7190 return getOperandLoc(Test, Operands); 7191 } 7192 7193 //===----------------------------------------------------------------------===// 7194 // swizzle 7195 //===----------------------------------------------------------------------===// 7196 7197 LLVM_READNONE 7198 static unsigned 7199 encodeBitmaskPerm(const unsigned AndMask, 7200 const unsigned OrMask, 7201 const unsigned XorMask) { 7202 using namespace llvm::AMDGPU::Swizzle; 7203 7204 return BITMASK_PERM_ENC | 7205 (AndMask << BITMASK_AND_SHIFT) | 7206 (OrMask << BITMASK_OR_SHIFT) | 7207 (XorMask << BITMASK_XOR_SHIFT); 7208 } 7209 7210 bool 7211 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7212 const unsigned MinVal, 7213 const unsigned MaxVal, 7214 const StringRef ErrMsg, 7215 SMLoc &Loc) { 7216 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7217 return false; 7218 } 7219 Loc = getLoc(); 7220 if (!parseExpr(Op)) { 7221 return false; 7222 } 7223 if (Op < MinVal || Op > MaxVal) { 7224 Error(Loc, ErrMsg); 7225 return false; 7226 } 7227 7228 return true; 7229 } 7230 7231 bool 7232 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7233 const unsigned MinVal, 7234 const unsigned MaxVal, 7235 const StringRef ErrMsg) { 7236 SMLoc Loc; 7237 for (unsigned i = 0; i < OpNum; ++i) { 7238 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7239 return false; 7240 } 7241 7242 return true; 7243 } 7244 7245 bool 7246 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7247 using namespace llvm::AMDGPU::Swizzle; 7248 7249 int64_t Lane[LANE_NUM]; 7250 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7251 "expected a 2-bit lane id")) { 7252 Imm = QUAD_PERM_ENC; 7253 for (unsigned I = 0; I < LANE_NUM; ++I) { 7254 Imm |= Lane[I] << (LANE_SHIFT * I); 7255 } 7256 return true; 7257 } 7258 return false; 7259 } 7260 7261 bool 7262 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7263 using namespace llvm::AMDGPU::Swizzle; 7264 7265 SMLoc Loc; 7266 int64_t GroupSize; 7267 int64_t LaneIdx; 7268 7269 if (!parseSwizzleOperand(GroupSize, 7270 2, 32, 7271 "group size must be in the interval [2,32]", 7272 Loc)) { 7273 return false; 7274 } 7275 if (!isPowerOf2_64(GroupSize)) { 7276 Error(Loc, "group size must be a power of two"); 7277 return false; 7278 } 7279 if (parseSwizzleOperand(LaneIdx, 7280 0, GroupSize - 1, 7281 "lane id must be in the interval [0,group size - 1]", 7282 Loc)) { 7283 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7284 return true; 7285 } 7286 return false; 7287 } 7288 7289 bool 7290 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7291 using namespace llvm::AMDGPU::Swizzle; 7292 7293 SMLoc Loc; 7294 int64_t GroupSize; 7295 7296 if (!parseSwizzleOperand(GroupSize, 7297 2, 32, 7298 "group size must be in the interval [2,32]", 7299 Loc)) { 7300 return false; 7301 } 7302 if (!isPowerOf2_64(GroupSize)) { 7303 Error(Loc, "group size must be a power of two"); 7304 return false; 7305 } 7306 7307 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7308 return true; 7309 } 7310 7311 bool 7312 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7313 using namespace llvm::AMDGPU::Swizzle; 7314 7315 SMLoc Loc; 7316 int64_t GroupSize; 7317 7318 if (!parseSwizzleOperand(GroupSize, 7319 1, 16, 7320 "group size must be in the interval [1,16]", 7321 Loc)) { 7322 return false; 7323 } 7324 if (!isPowerOf2_64(GroupSize)) { 7325 Error(Loc, "group size must be a power of two"); 7326 return false; 7327 } 7328 7329 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7330 return true; 7331 } 7332 7333 bool 7334 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7335 using namespace llvm::AMDGPU::Swizzle; 7336 7337 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7338 return false; 7339 } 7340 7341 StringRef Ctl; 7342 SMLoc StrLoc = getLoc(); 7343 if (!parseString(Ctl)) { 7344 return false; 7345 } 7346 if (Ctl.size() != BITMASK_WIDTH) { 7347 Error(StrLoc, "expected a 5-character mask"); 7348 return false; 7349 } 7350 7351 unsigned AndMask = 0; 7352 unsigned OrMask = 0; 7353 unsigned XorMask = 0; 7354 7355 for (size_t i = 0; i < Ctl.size(); ++i) { 7356 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7357 switch(Ctl[i]) { 7358 default: 7359 Error(StrLoc, "invalid mask"); 7360 return false; 7361 case '0': 7362 break; 7363 case '1': 7364 OrMask |= Mask; 7365 break; 7366 case 'p': 7367 AndMask |= Mask; 7368 break; 7369 case 'i': 7370 AndMask |= Mask; 7371 XorMask |= Mask; 7372 break; 7373 } 7374 } 7375 7376 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7377 return true; 7378 } 7379 7380 bool 7381 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7382 7383 SMLoc OffsetLoc = getLoc(); 7384 7385 if (!parseExpr(Imm, "a swizzle macro")) { 7386 return false; 7387 } 7388 if (!isUInt<16>(Imm)) { 7389 Error(OffsetLoc, "expected a 16-bit offset"); 7390 return false; 7391 } 7392 return true; 7393 } 7394 7395 bool 7396 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7397 using namespace llvm::AMDGPU::Swizzle; 7398 7399 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7400 7401 SMLoc ModeLoc = getLoc(); 7402 bool Ok = false; 7403 7404 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7405 Ok = parseSwizzleQuadPerm(Imm); 7406 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7407 Ok = parseSwizzleBitmaskPerm(Imm); 7408 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7409 Ok = parseSwizzleBroadcast(Imm); 7410 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7411 Ok = parseSwizzleSwap(Imm); 7412 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7413 Ok = parseSwizzleReverse(Imm); 7414 } else { 7415 Error(ModeLoc, "expected a swizzle mode"); 7416 } 7417 7418 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7419 } 7420 7421 return false; 7422 } 7423 7424 OperandMatchResultTy 7425 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7426 SMLoc S = getLoc(); 7427 int64_t Imm = 0; 7428 7429 if (trySkipId("offset")) { 7430 7431 bool Ok = false; 7432 if (skipToken(AsmToken::Colon, "expected a colon")) { 7433 if (trySkipId("swizzle")) { 7434 Ok = parseSwizzleMacro(Imm); 7435 } else { 7436 Ok = parseSwizzleOffset(Imm); 7437 } 7438 } 7439 7440 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7441 7442 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7443 } else { 7444 // Swizzle "offset" operand is optional. 7445 // If it is omitted, try parsing other optional operands. 7446 return parseOptionalOpr(Operands); 7447 } 7448 } 7449 7450 bool 7451 AMDGPUOperand::isSwizzle() const { 7452 return isImmTy(ImmTySwizzle); 7453 } 7454 7455 //===----------------------------------------------------------------------===// 7456 // VGPR Index Mode 7457 //===----------------------------------------------------------------------===// 7458 7459 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7460 7461 using namespace llvm::AMDGPU::VGPRIndexMode; 7462 7463 if (trySkipToken(AsmToken::RParen)) { 7464 return OFF; 7465 } 7466 7467 int64_t Imm = 0; 7468 7469 while (true) { 7470 unsigned Mode = 0; 7471 SMLoc S = getLoc(); 7472 7473 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7474 if (trySkipId(IdSymbolic[ModeId])) { 7475 Mode = 1 << ModeId; 7476 break; 7477 } 7478 } 7479 7480 if (Mode == 0) { 7481 Error(S, (Imm == 0)? 7482 "expected a VGPR index mode or a closing parenthesis" : 7483 "expected a VGPR index mode"); 7484 return UNDEF; 7485 } 7486 7487 if (Imm & Mode) { 7488 Error(S, "duplicate VGPR index mode"); 7489 return UNDEF; 7490 } 7491 Imm |= Mode; 7492 7493 if (trySkipToken(AsmToken::RParen)) 7494 break; 7495 if (!skipToken(AsmToken::Comma, 7496 "expected a comma or a closing parenthesis")) 7497 return UNDEF; 7498 } 7499 7500 return Imm; 7501 } 7502 7503 OperandMatchResultTy 7504 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7505 7506 using namespace llvm::AMDGPU::VGPRIndexMode; 7507 7508 int64_t Imm = 0; 7509 SMLoc S = getLoc(); 7510 7511 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7512 Imm = parseGPRIdxMacro(); 7513 if (Imm == UNDEF) 7514 return MatchOperand_ParseFail; 7515 } else { 7516 if (getParser().parseAbsoluteExpression(Imm)) 7517 return MatchOperand_ParseFail; 7518 if (Imm < 0 || !isUInt<4>(Imm)) { 7519 Error(S, "invalid immediate: only 4-bit values are legal"); 7520 return MatchOperand_ParseFail; 7521 } 7522 } 7523 7524 Operands.push_back( 7525 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7526 return MatchOperand_Success; 7527 } 7528 7529 bool AMDGPUOperand::isGPRIdxMode() const { 7530 return isImmTy(ImmTyGprIdxMode); 7531 } 7532 7533 //===----------------------------------------------------------------------===// 7534 // sopp branch targets 7535 //===----------------------------------------------------------------------===// 7536 7537 OperandMatchResultTy 7538 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7539 7540 // Make sure we are not parsing something 7541 // that looks like a label or an expression but is not. 7542 // This will improve error messages. 7543 if (isRegister() || isModifier()) 7544 return MatchOperand_NoMatch; 7545 7546 if (!parseExpr(Operands)) 7547 return MatchOperand_ParseFail; 7548 7549 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7550 assert(Opr.isImm() || Opr.isExpr()); 7551 SMLoc Loc = Opr.getStartLoc(); 7552 7553 // Currently we do not support arbitrary expressions as branch targets. 7554 // Only labels and absolute expressions are accepted. 7555 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7556 Error(Loc, "expected an absolute expression or a label"); 7557 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7558 Error(Loc, "expected a 16-bit signed jump offset"); 7559 } 7560 7561 return MatchOperand_Success; 7562 } 7563 7564 //===----------------------------------------------------------------------===// 7565 // Boolean holding registers 7566 //===----------------------------------------------------------------------===// 7567 7568 OperandMatchResultTy 7569 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7570 return parseReg(Operands); 7571 } 7572 7573 //===----------------------------------------------------------------------===// 7574 // mubuf 7575 //===----------------------------------------------------------------------===// 7576 7577 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7578 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7579 } 7580 7581 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7582 const OperandVector &Operands, 7583 bool IsAtomic, 7584 bool IsLds) { 7585 OptionalImmIndexMap OptionalIdx; 7586 unsigned FirstOperandIdx = 1; 7587 bool IsAtomicReturn = false; 7588 7589 if (IsAtomic) { 7590 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7591 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7592 if (!Op.isCPol()) 7593 continue; 7594 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7595 break; 7596 } 7597 7598 if (!IsAtomicReturn) { 7599 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7600 if (NewOpc != -1) 7601 Inst.setOpcode(NewOpc); 7602 } 7603 7604 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7605 SIInstrFlags::IsAtomicRet; 7606 } 7607 7608 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7609 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7610 7611 // Add the register arguments 7612 if (Op.isReg()) { 7613 Op.addRegOperands(Inst, 1); 7614 // Insert a tied src for atomic return dst. 7615 // This cannot be postponed as subsequent calls to 7616 // addImmOperands rely on correct number of MC operands. 7617 if (IsAtomicReturn && i == FirstOperandIdx) 7618 Op.addRegOperands(Inst, 1); 7619 continue; 7620 } 7621 7622 // Handle the case where soffset is an immediate 7623 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7624 Op.addImmOperands(Inst, 1); 7625 continue; 7626 } 7627 7628 // Handle tokens like 'offen' which are sometimes hard-coded into the 7629 // asm string. There are no MCInst operands for these. 7630 if (Op.isToken()) { 7631 continue; 7632 } 7633 assert(Op.isImm()); 7634 7635 // Handle optional arguments 7636 OptionalIdx[Op.getImmTy()] = i; 7637 } 7638 7639 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7640 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7641 7642 if (!IsLds) { // tfe is not legal with lds opcodes 7643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7644 } 7645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7646 } 7647 7648 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7649 OptionalImmIndexMap OptionalIdx; 7650 7651 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7652 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7653 7654 // Add the register arguments 7655 if (Op.isReg()) { 7656 Op.addRegOperands(Inst, 1); 7657 continue; 7658 } 7659 7660 // Handle the case where soffset is an immediate 7661 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7662 Op.addImmOperands(Inst, 1); 7663 continue; 7664 } 7665 7666 // Handle tokens like 'offen' which are sometimes hard-coded into the 7667 // asm string. There are no MCInst operands for these. 7668 if (Op.isToken()) { 7669 continue; 7670 } 7671 assert(Op.isImm()); 7672 7673 // Handle optional arguments 7674 OptionalIdx[Op.getImmTy()] = i; 7675 } 7676 7677 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7678 AMDGPUOperand::ImmTyOffset); 7679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7681 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7682 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7683 } 7684 7685 //===----------------------------------------------------------------------===// 7686 // mimg 7687 //===----------------------------------------------------------------------===// 7688 7689 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7690 bool IsAtomic) { 7691 unsigned I = 1; 7692 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7693 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7694 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7695 } 7696 7697 if (IsAtomic) { 7698 // Add src, same as dst 7699 assert(Desc.getNumDefs() == 1); 7700 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7701 } 7702 7703 OptionalImmIndexMap OptionalIdx; 7704 7705 for (unsigned E = Operands.size(); I != E; ++I) { 7706 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7707 7708 // Add the register arguments 7709 if (Op.isReg()) { 7710 Op.addRegOperands(Inst, 1); 7711 } else if (Op.isImmModifier()) { 7712 OptionalIdx[Op.getImmTy()] = I; 7713 } else if (!Op.isToken()) { 7714 llvm_unreachable("unexpected operand type"); 7715 } 7716 } 7717 7718 bool IsGFX10Plus = isGFX10Plus(); 7719 7720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7721 if (IsGFX10Plus) 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7726 if (IsGFX10Plus) 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7728 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7731 if (!IsGFX10Plus) 7732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7734 } 7735 7736 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7737 cvtMIMG(Inst, Operands, true); 7738 } 7739 7740 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7741 OptionalImmIndexMap OptionalIdx; 7742 bool IsAtomicReturn = false; 7743 7744 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7745 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7746 if (!Op.isCPol()) 7747 continue; 7748 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7749 break; 7750 } 7751 7752 if (!IsAtomicReturn) { 7753 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7754 if (NewOpc != -1) 7755 Inst.setOpcode(NewOpc); 7756 } 7757 7758 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7759 SIInstrFlags::IsAtomicRet; 7760 7761 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7762 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7763 7764 // Add the register arguments 7765 if (Op.isReg()) { 7766 Op.addRegOperands(Inst, 1); 7767 if (IsAtomicReturn && i == 1) 7768 Op.addRegOperands(Inst, 1); 7769 continue; 7770 } 7771 7772 // Handle the case where soffset is an immediate 7773 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7774 Op.addImmOperands(Inst, 1); 7775 continue; 7776 } 7777 7778 // Handle tokens like 'offen' which are sometimes hard-coded into the 7779 // asm string. There are no MCInst operands for these. 7780 if (Op.isToken()) { 7781 continue; 7782 } 7783 assert(Op.isImm()); 7784 7785 // Handle optional arguments 7786 OptionalIdx[Op.getImmTy()] = i; 7787 } 7788 7789 if ((int)Inst.getNumOperands() <= 7790 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7792 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7793 } 7794 7795 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7796 const OperandVector &Operands) { 7797 for (unsigned I = 1; I < Operands.size(); ++I) { 7798 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7799 if (Operand.isReg()) 7800 Operand.addRegOperands(Inst, 1); 7801 } 7802 7803 Inst.addOperand(MCOperand::createImm(1)); // a16 7804 } 7805 7806 //===----------------------------------------------------------------------===// 7807 // smrd 7808 //===----------------------------------------------------------------------===// 7809 7810 bool AMDGPUOperand::isSMRDOffset8() const { 7811 return isImm() && isUInt<8>(getImm()); 7812 } 7813 7814 bool AMDGPUOperand::isSMEMOffset() const { 7815 return isImmTy(ImmTyNone) || 7816 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7817 } 7818 7819 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7820 // 32-bit literals are only supported on CI and we only want to use them 7821 // when the offset is > 8-bits. 7822 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7823 } 7824 7825 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7826 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7827 } 7828 7829 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7830 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7831 } 7832 7833 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7834 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7835 } 7836 7837 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7838 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7839 } 7840 7841 //===----------------------------------------------------------------------===// 7842 // vop3 7843 //===----------------------------------------------------------------------===// 7844 7845 static bool ConvertOmodMul(int64_t &Mul) { 7846 if (Mul != 1 && Mul != 2 && Mul != 4) 7847 return false; 7848 7849 Mul >>= 1; 7850 return true; 7851 } 7852 7853 static bool ConvertOmodDiv(int64_t &Div) { 7854 if (Div == 1) { 7855 Div = 0; 7856 return true; 7857 } 7858 7859 if (Div == 2) { 7860 Div = 3; 7861 return true; 7862 } 7863 7864 return false; 7865 } 7866 7867 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7868 // This is intentional and ensures compatibility with sp3. 7869 // See bug 35397 for details. 7870 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7871 if (BoundCtrl == 0 || BoundCtrl == 1) { 7872 BoundCtrl = 1; 7873 return true; 7874 } 7875 return false; 7876 } 7877 7878 // Note: the order in this table matches the order of operands in AsmString. 7879 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7880 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7881 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7882 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7883 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7884 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7885 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7886 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7887 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7888 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7889 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7890 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7891 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7892 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7893 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7894 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7895 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7896 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7897 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7898 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7899 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7900 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7901 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7902 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7903 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7904 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7905 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7906 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7907 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7908 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7909 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7910 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7911 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7912 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7913 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7914 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7915 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7916 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7917 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7918 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7919 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7920 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7921 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7922 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7923 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7924 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7925 }; 7926 7927 void AMDGPUAsmParser::onBeginOfFile() { 7928 if (!getParser().getStreamer().getTargetStreamer() || 7929 getSTI().getTargetTriple().getArch() == Triple::r600) 7930 return; 7931 7932 if (!getTargetStreamer().getTargetID()) 7933 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7934 7935 if (isHsaAbiVersion3AndAbove(&getSTI())) 7936 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7937 } 7938 7939 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7940 7941 OperandMatchResultTy res = parseOptionalOpr(Operands); 7942 7943 // This is a hack to enable hardcoded mandatory operands which follow 7944 // optional operands. 7945 // 7946 // Current design assumes that all operands after the first optional operand 7947 // are also optional. However implementation of some instructions violates 7948 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7949 // 7950 // To alleviate this problem, we have to (implicitly) parse extra operands 7951 // to make sure autogenerated parser of custom operands never hit hardcoded 7952 // mandatory operands. 7953 7954 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7955 if (res != MatchOperand_Success || 7956 isToken(AsmToken::EndOfStatement)) 7957 break; 7958 7959 trySkipToken(AsmToken::Comma); 7960 res = parseOptionalOpr(Operands); 7961 } 7962 7963 return res; 7964 } 7965 7966 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7967 OperandMatchResultTy res; 7968 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7969 // try to parse any optional operand here 7970 if (Op.IsBit) { 7971 res = parseNamedBit(Op.Name, Operands, Op.Type); 7972 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7973 res = parseOModOperand(Operands); 7974 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7975 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7976 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7977 res = parseSDWASel(Operands, Op.Name, Op.Type); 7978 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7979 res = parseSDWADstUnused(Operands); 7980 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7981 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7982 Op.Type == AMDGPUOperand::ImmTyNegLo || 7983 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7984 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7985 Op.ConvertResult); 7986 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7987 res = parseDim(Operands); 7988 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7989 res = parseCPol(Operands); 7990 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7991 res = parseDPP8(Operands); 7992 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7993 res = parseDPPCtrl(Operands); 7994 } else { 7995 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7996 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7997 res = parseOperandArrayWithPrefix("neg", Operands, 7998 AMDGPUOperand::ImmTyBLGP, 7999 nullptr); 8000 } 8001 } 8002 if (res != MatchOperand_NoMatch) { 8003 return res; 8004 } 8005 } 8006 return MatchOperand_NoMatch; 8007 } 8008 8009 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8010 StringRef Name = getTokenStr(); 8011 if (Name == "mul") { 8012 return parseIntWithPrefix("mul", Operands, 8013 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8014 } 8015 8016 if (Name == "div") { 8017 return parseIntWithPrefix("div", Operands, 8018 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8019 } 8020 8021 return MatchOperand_NoMatch; 8022 } 8023 8024 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8025 cvtVOP3P(Inst, Operands); 8026 8027 int Opc = Inst.getOpcode(); 8028 8029 int SrcNum; 8030 const int Ops[] = { AMDGPU::OpName::src0, 8031 AMDGPU::OpName::src1, 8032 AMDGPU::OpName::src2 }; 8033 for (SrcNum = 0; 8034 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8035 ++SrcNum); 8036 assert(SrcNum > 0); 8037 8038 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8039 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8040 8041 if ((OpSel & (1 << SrcNum)) != 0) { 8042 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8043 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8044 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8045 } 8046 } 8047 8048 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8049 // 1. This operand is input modifiers 8050 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8051 // 2. This is not last operand 8052 && Desc.NumOperands > (OpNum + 1) 8053 // 3. Next operand is register class 8054 && Desc.OpInfo[OpNum + 1].RegClass != -1 8055 // 4. Next register is not tied to any other operand 8056 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8057 } 8058 8059 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8060 { 8061 OptionalImmIndexMap OptionalIdx; 8062 unsigned Opc = Inst.getOpcode(); 8063 8064 unsigned I = 1; 8065 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8066 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8067 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8068 } 8069 8070 for (unsigned E = Operands.size(); I != E; ++I) { 8071 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8072 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8073 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8074 } else if (Op.isInterpSlot() || 8075 Op.isInterpAttr() || 8076 Op.isAttrChan()) { 8077 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8078 } else if (Op.isImmModifier()) { 8079 OptionalIdx[Op.getImmTy()] = I; 8080 } else { 8081 llvm_unreachable("unhandled operand type"); 8082 } 8083 } 8084 8085 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8087 } 8088 8089 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8090 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8091 } 8092 8093 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8095 } 8096 } 8097 8098 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8099 { 8100 OptionalImmIndexMap OptionalIdx; 8101 unsigned Opc = Inst.getOpcode(); 8102 8103 unsigned I = 1; 8104 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8105 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8106 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8107 } 8108 8109 for (unsigned E = Operands.size(); I != E; ++I) { 8110 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8111 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8112 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8113 } else if (Op.isImmModifier()) { 8114 OptionalIdx[Op.getImmTy()] = I; 8115 } else { 8116 llvm_unreachable("unhandled operand type"); 8117 } 8118 } 8119 8120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8121 8122 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8123 if (OpSelIdx != -1) 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8125 8126 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8127 8128 if (OpSelIdx == -1) 8129 return; 8130 8131 const int Ops[] = { AMDGPU::OpName::src0, 8132 AMDGPU::OpName::src1, 8133 AMDGPU::OpName::src2 }; 8134 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8135 AMDGPU::OpName::src1_modifiers, 8136 AMDGPU::OpName::src2_modifiers }; 8137 8138 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8139 8140 for (int J = 0; J < 3; ++J) { 8141 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8142 if (OpIdx == -1) 8143 break; 8144 8145 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8146 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8147 8148 if ((OpSel & (1 << J)) != 0) 8149 ModVal |= SISrcMods::OP_SEL_0; 8150 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8151 (OpSel & (1 << 3)) != 0) 8152 ModVal |= SISrcMods::DST_OP_SEL; 8153 8154 Inst.getOperand(ModIdx).setImm(ModVal); 8155 } 8156 } 8157 8158 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8159 OptionalImmIndexMap &OptionalIdx) { 8160 unsigned Opc = Inst.getOpcode(); 8161 8162 unsigned I = 1; 8163 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8164 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8165 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8166 } 8167 8168 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8169 // This instruction has src modifiers 8170 for (unsigned E = Operands.size(); I != E; ++I) { 8171 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8172 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8173 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8174 } else if (Op.isImmModifier()) { 8175 OptionalIdx[Op.getImmTy()] = I; 8176 } else if (Op.isRegOrImm()) { 8177 Op.addRegOrImmOperands(Inst, 1); 8178 } else { 8179 llvm_unreachable("unhandled operand type"); 8180 } 8181 } 8182 } else { 8183 // No src modifiers 8184 for (unsigned E = Operands.size(); I != E; ++I) { 8185 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8186 if (Op.isMod()) { 8187 OptionalIdx[Op.getImmTy()] = I; 8188 } else { 8189 Op.addRegOrImmOperands(Inst, 1); 8190 } 8191 } 8192 } 8193 8194 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8196 } 8197 8198 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8200 } 8201 8202 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8203 // it has src2 register operand that is tied to dst operand 8204 // we don't allow modifiers for this operand in assembler so src2_modifiers 8205 // should be 0. 8206 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8207 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8208 Opc == AMDGPU::V_MAC_F32_e64_vi || 8209 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8210 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8211 Opc == AMDGPU::V_MAC_F16_e64_vi || 8212 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8213 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8214 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8215 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8216 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8217 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8218 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8219 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8220 auto it = Inst.begin(); 8221 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8222 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8223 ++it; 8224 // Copy the operand to ensure it's not invalidated when Inst grows. 8225 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8226 } 8227 } 8228 8229 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8230 OptionalImmIndexMap OptionalIdx; 8231 cvtVOP3(Inst, Operands, OptionalIdx); 8232 } 8233 8234 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8235 OptionalImmIndexMap &OptIdx) { 8236 const int Opc = Inst.getOpcode(); 8237 const MCInstrDesc &Desc = MII.get(Opc); 8238 8239 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8240 8241 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8242 assert(!IsPacked); 8243 Inst.addOperand(Inst.getOperand(0)); 8244 } 8245 8246 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8247 // instruction, and then figure out where to actually put the modifiers 8248 8249 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8250 if (OpSelIdx != -1) { 8251 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8252 } 8253 8254 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8255 if (OpSelHiIdx != -1) { 8256 int DefaultVal = IsPacked ? -1 : 0; 8257 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8258 DefaultVal); 8259 } 8260 8261 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8262 if (NegLoIdx != -1) { 8263 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8264 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8265 } 8266 8267 const int Ops[] = { AMDGPU::OpName::src0, 8268 AMDGPU::OpName::src1, 8269 AMDGPU::OpName::src2 }; 8270 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8271 AMDGPU::OpName::src1_modifiers, 8272 AMDGPU::OpName::src2_modifiers }; 8273 8274 unsigned OpSel = 0; 8275 unsigned OpSelHi = 0; 8276 unsigned NegLo = 0; 8277 unsigned NegHi = 0; 8278 8279 if (OpSelIdx != -1) 8280 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8281 8282 if (OpSelHiIdx != -1) 8283 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8284 8285 if (NegLoIdx != -1) { 8286 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8287 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8288 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8289 } 8290 8291 for (int J = 0; J < 3; ++J) { 8292 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8293 if (OpIdx == -1) 8294 break; 8295 8296 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8297 8298 if (ModIdx == -1) 8299 continue; 8300 8301 uint32_t ModVal = 0; 8302 8303 if ((OpSel & (1 << J)) != 0) 8304 ModVal |= SISrcMods::OP_SEL_0; 8305 8306 if ((OpSelHi & (1 << J)) != 0) 8307 ModVal |= SISrcMods::OP_SEL_1; 8308 8309 if ((NegLo & (1 << J)) != 0) 8310 ModVal |= SISrcMods::NEG; 8311 8312 if ((NegHi & (1 << J)) != 0) 8313 ModVal |= SISrcMods::NEG_HI; 8314 8315 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8316 } 8317 } 8318 8319 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8320 OptionalImmIndexMap OptIdx; 8321 cvtVOP3(Inst, Operands, OptIdx); 8322 cvtVOP3P(Inst, Operands, OptIdx); 8323 } 8324 8325 //===----------------------------------------------------------------------===// 8326 // VOPD 8327 //===----------------------------------------------------------------------===// 8328 8329 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8330 if (!hasVOPD(getSTI())) 8331 return MatchOperand_NoMatch; 8332 8333 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8334 SMLoc S = getLoc(); 8335 lex(); 8336 lex(); 8337 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8338 const MCExpr *Expr; 8339 if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { 8340 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 8341 return MatchOperand_Success; 8342 } 8343 Error(S, "invalid VOPD :: usage"); 8344 return MatchOperand_ParseFail; 8345 } 8346 return MatchOperand_NoMatch; 8347 } 8348 8349 // Create VOPD MCInst operands using parsed assembler operands. 8350 // Parsed VOPD operands are ordered as follows: 8351 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 8352 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8353 // If both OpX and OpY have an imm, the first imm has a different name: 8354 // OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' 8355 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8356 // MCInst operands have the following order: 8357 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8358 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8359 auto addOp = [&](uint16_t i) { // NOLINT:function pointer 8360 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8361 if (Op.isReg()) { 8362 Op.addRegOperands(Inst, 1); 8363 return; 8364 } 8365 if (Op.isImm()) { 8366 Op.addImmOperands(Inst, 1); 8367 return; 8368 } 8369 // Handle tokens like 'offen' which are sometimes hard-coded into the 8370 // asm string. There are no MCInst operands for these. 8371 if (Op.isToken()) { 8372 return; 8373 } 8374 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8375 }; 8376 8377 // Indices into MCInst.Operands 8378 const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... 8379 const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... 8380 const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... 8381 8382 unsigned Opc = Inst.getOpcode(); 8383 bool HasVsrc1X = 8384 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; 8385 bool HasImmX = 8386 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8387 (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8388 FmamkOpXImmMCIndex || 8389 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8390 FmaakOpXImmMCIndex)); 8391 8392 bool HasVsrc1Y = 8393 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; 8394 bool HasImmY = 8395 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8396 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= 8397 MinOpYImmMCIndex + HasVsrc1X; 8398 8399 // Indices of parsed operands relative to dst 8400 const auto DstIdx = 0; 8401 const auto Src0Idx = 1; 8402 const auto Vsrc1OrImmIdx = 2; 8403 8404 const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; 8405 const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) 8406 8407 // Offsets into parsed operands 8408 const auto OpXFirstOperandOffset = 1; 8409 const auto OpYFirstOperandOffset = 8410 OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; 8411 8412 // Order of addOp calls determines MC operand order 8413 addOp(OpXFirstOperandOffset + DstIdx); // vdstX 8414 addOp(OpYFirstOperandOffset + DstIdx); // vdstY 8415 8416 addOp(OpXFirstOperandOffset + Src0Idx); // src0X 8417 if (HasImmX) { 8418 // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak 8419 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); 8420 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); 8421 } else { 8422 if (HasVsrc1X) // all except v_mov 8423 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X 8424 } 8425 8426 addOp(OpYFirstOperandOffset + Src0Idx); // src0Y 8427 if (HasImmY) { 8428 // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak 8429 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); 8430 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); 8431 } else { 8432 if (HasVsrc1Y) // all except v_mov 8433 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y 8434 } 8435 } 8436 8437 //===----------------------------------------------------------------------===// 8438 // dpp 8439 //===----------------------------------------------------------------------===// 8440 8441 bool AMDGPUOperand::isDPP8() const { 8442 return isImmTy(ImmTyDPP8); 8443 } 8444 8445 bool AMDGPUOperand::isDPPCtrl() const { 8446 using namespace AMDGPU::DPP; 8447 8448 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8449 if (result) { 8450 int64_t Imm = getImm(); 8451 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8452 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8453 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8454 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8455 (Imm == DppCtrl::WAVE_SHL1) || 8456 (Imm == DppCtrl::WAVE_ROL1) || 8457 (Imm == DppCtrl::WAVE_SHR1) || 8458 (Imm == DppCtrl::WAVE_ROR1) || 8459 (Imm == DppCtrl::ROW_MIRROR) || 8460 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8461 (Imm == DppCtrl::BCAST15) || 8462 (Imm == DppCtrl::BCAST31) || 8463 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8464 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8465 } 8466 return false; 8467 } 8468 8469 //===----------------------------------------------------------------------===// 8470 // mAI 8471 //===----------------------------------------------------------------------===// 8472 8473 bool AMDGPUOperand::isBLGP() const { 8474 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8475 } 8476 8477 bool AMDGPUOperand::isCBSZ() const { 8478 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8479 } 8480 8481 bool AMDGPUOperand::isABID() const { 8482 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8483 } 8484 8485 bool AMDGPUOperand::isS16Imm() const { 8486 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8487 } 8488 8489 bool AMDGPUOperand::isU16Imm() const { 8490 return isImm() && isUInt<16>(getImm()); 8491 } 8492 8493 //===----------------------------------------------------------------------===// 8494 // dim 8495 //===----------------------------------------------------------------------===// 8496 8497 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8498 // We want to allow "dim:1D" etc., 8499 // but the initial 1 is tokenized as an integer. 8500 std::string Token; 8501 if (isToken(AsmToken::Integer)) { 8502 SMLoc Loc = getToken().getEndLoc(); 8503 Token = std::string(getTokenStr()); 8504 lex(); 8505 if (getLoc() != Loc) 8506 return false; 8507 } 8508 8509 StringRef Suffix; 8510 if (!parseId(Suffix)) 8511 return false; 8512 Token += Suffix; 8513 8514 StringRef DimId = Token; 8515 if (DimId.startswith("SQ_RSRC_IMG_")) 8516 DimId = DimId.drop_front(12); 8517 8518 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8519 if (!DimInfo) 8520 return false; 8521 8522 Encoding = DimInfo->Encoding; 8523 return true; 8524 } 8525 8526 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8527 if (!isGFX10Plus()) 8528 return MatchOperand_NoMatch; 8529 8530 SMLoc S = getLoc(); 8531 8532 if (!trySkipId("dim", AsmToken::Colon)) 8533 return MatchOperand_NoMatch; 8534 8535 unsigned Encoding; 8536 SMLoc Loc = getLoc(); 8537 if (!parseDimId(Encoding)) { 8538 Error(Loc, "invalid dim value"); 8539 return MatchOperand_ParseFail; 8540 } 8541 8542 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8543 AMDGPUOperand::ImmTyDim)); 8544 return MatchOperand_Success; 8545 } 8546 8547 //===----------------------------------------------------------------------===// 8548 // dpp 8549 //===----------------------------------------------------------------------===// 8550 8551 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8552 SMLoc S = getLoc(); 8553 8554 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8555 return MatchOperand_NoMatch; 8556 8557 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8558 8559 int64_t Sels[8]; 8560 8561 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8562 return MatchOperand_ParseFail; 8563 8564 for (size_t i = 0; i < 8; ++i) { 8565 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8566 return MatchOperand_ParseFail; 8567 8568 SMLoc Loc = getLoc(); 8569 if (getParser().parseAbsoluteExpression(Sels[i])) 8570 return MatchOperand_ParseFail; 8571 if (0 > Sels[i] || 7 < Sels[i]) { 8572 Error(Loc, "expected a 3-bit value"); 8573 return MatchOperand_ParseFail; 8574 } 8575 } 8576 8577 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8578 return MatchOperand_ParseFail; 8579 8580 unsigned DPP8 = 0; 8581 for (size_t i = 0; i < 8; ++i) 8582 DPP8 |= (Sels[i] << (i * 3)); 8583 8584 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8585 return MatchOperand_Success; 8586 } 8587 8588 bool 8589 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8590 const OperandVector &Operands) { 8591 if (Ctrl == "row_newbcast") 8592 return isGFX90A(); 8593 8594 if (Ctrl == "row_share" || 8595 Ctrl == "row_xmask") 8596 return isGFX10Plus(); 8597 8598 if (Ctrl == "wave_shl" || 8599 Ctrl == "wave_shr" || 8600 Ctrl == "wave_rol" || 8601 Ctrl == "wave_ror" || 8602 Ctrl == "row_bcast") 8603 return isVI() || isGFX9(); 8604 8605 return Ctrl == "row_mirror" || 8606 Ctrl == "row_half_mirror" || 8607 Ctrl == "quad_perm" || 8608 Ctrl == "row_shl" || 8609 Ctrl == "row_shr" || 8610 Ctrl == "row_ror"; 8611 } 8612 8613 int64_t 8614 AMDGPUAsmParser::parseDPPCtrlPerm() { 8615 // quad_perm:[%d,%d,%d,%d] 8616 8617 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8618 return -1; 8619 8620 int64_t Val = 0; 8621 for (int i = 0; i < 4; ++i) { 8622 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8623 return -1; 8624 8625 int64_t Temp; 8626 SMLoc Loc = getLoc(); 8627 if (getParser().parseAbsoluteExpression(Temp)) 8628 return -1; 8629 if (Temp < 0 || Temp > 3) { 8630 Error(Loc, "expected a 2-bit value"); 8631 return -1; 8632 } 8633 8634 Val += (Temp << i * 2); 8635 } 8636 8637 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8638 return -1; 8639 8640 return Val; 8641 } 8642 8643 int64_t 8644 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8645 using namespace AMDGPU::DPP; 8646 8647 // sel:%d 8648 8649 int64_t Val; 8650 SMLoc Loc = getLoc(); 8651 8652 if (getParser().parseAbsoluteExpression(Val)) 8653 return -1; 8654 8655 struct DppCtrlCheck { 8656 int64_t Ctrl; 8657 int Lo; 8658 int Hi; 8659 }; 8660 8661 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8662 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8663 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8664 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8665 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8666 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8667 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8668 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8669 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8670 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8671 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8672 .Default({-1, 0, 0}); 8673 8674 bool Valid; 8675 if (Check.Ctrl == -1) { 8676 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8677 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8678 } else { 8679 Valid = Check.Lo <= Val && Val <= Check.Hi; 8680 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8681 } 8682 8683 if (!Valid) { 8684 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8685 return -1; 8686 } 8687 8688 return Val; 8689 } 8690 8691 OperandMatchResultTy 8692 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8693 using namespace AMDGPU::DPP; 8694 8695 if (!isToken(AsmToken::Identifier) || 8696 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8697 return MatchOperand_NoMatch; 8698 8699 SMLoc S = getLoc(); 8700 int64_t Val = -1; 8701 StringRef Ctrl; 8702 8703 parseId(Ctrl); 8704 8705 if (Ctrl == "row_mirror") { 8706 Val = DppCtrl::ROW_MIRROR; 8707 } else if (Ctrl == "row_half_mirror") { 8708 Val = DppCtrl::ROW_HALF_MIRROR; 8709 } else { 8710 if (skipToken(AsmToken::Colon, "expected a colon")) { 8711 if (Ctrl == "quad_perm") { 8712 Val = parseDPPCtrlPerm(); 8713 } else { 8714 Val = parseDPPCtrlSel(Ctrl); 8715 } 8716 } 8717 } 8718 8719 if (Val == -1) 8720 return MatchOperand_ParseFail; 8721 8722 Operands.push_back( 8723 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8724 return MatchOperand_Success; 8725 } 8726 8727 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8728 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8729 } 8730 8731 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8732 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8733 } 8734 8735 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8736 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8737 } 8738 8739 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8740 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8741 } 8742 8743 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8744 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8745 } 8746 8747 // Add dummy $old operand 8748 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8749 const OperandVector &Operands, 8750 bool IsDPP8) { 8751 Inst.addOperand(MCOperand::createReg(0)); 8752 cvtVOP3DPP(Inst, Operands, IsDPP8); 8753 } 8754 8755 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8756 OptionalImmIndexMap OptionalIdx; 8757 unsigned Opc = Inst.getOpcode(); 8758 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8759 unsigned I = 1; 8760 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8761 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8762 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8763 } 8764 8765 int Fi = 0; 8766 for (unsigned E = Operands.size(); I != E; ++I) { 8767 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8768 MCOI::TIED_TO); 8769 if (TiedTo != -1) { 8770 assert((unsigned)TiedTo < Inst.getNumOperands()); 8771 // handle tied old or src2 for MAC instructions 8772 Inst.addOperand(Inst.getOperand(TiedTo)); 8773 } 8774 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8775 // Add the register arguments 8776 if (IsDPP8 && Op.isFI()) { 8777 Fi = Op.getImm(); 8778 } else if (HasModifiers && 8779 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8780 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8781 } else if (Op.isReg()) { 8782 Op.addRegOperands(Inst, 1); 8783 } else if (Op.isImm() && 8784 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8785 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8786 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8787 Op.addImmOperands(Inst, 1); 8788 } else if (Op.isImm()) { 8789 OptionalIdx[Op.getImmTy()] = I; 8790 } else { 8791 llvm_unreachable("unhandled operand type"); 8792 } 8793 } 8794 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8796 } 8797 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8799 } 8800 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8801 cvtVOP3P(Inst, Operands, OptionalIdx); 8802 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8804 } 8805 8806 if (IsDPP8) { 8807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8808 using namespace llvm::AMDGPU::DPP; 8809 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8810 } else { 8811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8815 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8817 } 8818 } 8819 } 8820 8821 // Add dummy $old operand 8822 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8823 const OperandVector &Operands, 8824 bool IsDPP8) { 8825 Inst.addOperand(MCOperand::createReg(0)); 8826 cvtDPP(Inst, Operands, IsDPP8); 8827 } 8828 8829 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8830 OptionalImmIndexMap OptionalIdx; 8831 8832 unsigned Opc = Inst.getOpcode(); 8833 bool HasModifiers = 8834 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8835 unsigned I = 1; 8836 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8837 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8838 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8839 } 8840 8841 int Fi = 0; 8842 for (unsigned E = Operands.size(); I != E; ++I) { 8843 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8844 MCOI::TIED_TO); 8845 if (TiedTo != -1) { 8846 assert((unsigned)TiedTo < Inst.getNumOperands()); 8847 // handle tied old or src2 for MAC instructions 8848 Inst.addOperand(Inst.getOperand(TiedTo)); 8849 } 8850 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8851 // Add the register arguments 8852 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8853 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8854 // Skip it. 8855 continue; 8856 } 8857 8858 if (IsDPP8) { 8859 if (Op.isDPP8()) { 8860 Op.addImmOperands(Inst, 1); 8861 } else if (HasModifiers && 8862 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8863 Op.addRegWithFPInputModsOperands(Inst, 2); 8864 } else if (Op.isFI()) { 8865 Fi = Op.getImm(); 8866 } else if (Op.isReg()) { 8867 Op.addRegOperands(Inst, 1); 8868 } else { 8869 llvm_unreachable("Invalid operand type"); 8870 } 8871 } else { 8872 if (HasModifiers && 8873 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8874 Op.addRegWithFPInputModsOperands(Inst, 2); 8875 } else if (Op.isReg()) { 8876 Op.addRegOperands(Inst, 1); 8877 } else if (Op.isDPPCtrl()) { 8878 Op.addImmOperands(Inst, 1); 8879 } else if (Op.isImm()) { 8880 // Handle optional arguments 8881 OptionalIdx[Op.getImmTy()] = I; 8882 } else { 8883 llvm_unreachable("Invalid operand type"); 8884 } 8885 } 8886 } 8887 8888 if (IsDPP8) { 8889 using namespace llvm::AMDGPU::DPP; 8890 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8891 } else { 8892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8895 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8897 } 8898 } 8899 } 8900 8901 //===----------------------------------------------------------------------===// 8902 // sdwa 8903 //===----------------------------------------------------------------------===// 8904 8905 OperandMatchResultTy 8906 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8907 AMDGPUOperand::ImmTy Type) { 8908 using namespace llvm::AMDGPU::SDWA; 8909 8910 SMLoc S = getLoc(); 8911 StringRef Value; 8912 OperandMatchResultTy res; 8913 8914 SMLoc StringLoc; 8915 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8916 if (res != MatchOperand_Success) { 8917 return res; 8918 } 8919 8920 int64_t Int; 8921 Int = StringSwitch<int64_t>(Value) 8922 .Case("BYTE_0", SdwaSel::BYTE_0) 8923 .Case("BYTE_1", SdwaSel::BYTE_1) 8924 .Case("BYTE_2", SdwaSel::BYTE_2) 8925 .Case("BYTE_3", SdwaSel::BYTE_3) 8926 .Case("WORD_0", SdwaSel::WORD_0) 8927 .Case("WORD_1", SdwaSel::WORD_1) 8928 .Case("DWORD", SdwaSel::DWORD) 8929 .Default(0xffffffff); 8930 8931 if (Int == 0xffffffff) { 8932 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8933 return MatchOperand_ParseFail; 8934 } 8935 8936 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8937 return MatchOperand_Success; 8938 } 8939 8940 OperandMatchResultTy 8941 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8942 using namespace llvm::AMDGPU::SDWA; 8943 8944 SMLoc S = getLoc(); 8945 StringRef Value; 8946 OperandMatchResultTy res; 8947 8948 SMLoc StringLoc; 8949 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8950 if (res != MatchOperand_Success) { 8951 return res; 8952 } 8953 8954 int64_t Int; 8955 Int = StringSwitch<int64_t>(Value) 8956 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8957 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8958 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8959 .Default(0xffffffff); 8960 8961 if (Int == 0xffffffff) { 8962 Error(StringLoc, "invalid dst_unused value"); 8963 return MatchOperand_ParseFail; 8964 } 8965 8966 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8967 return MatchOperand_Success; 8968 } 8969 8970 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8971 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8972 } 8973 8974 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8975 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8976 } 8977 8978 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8979 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8980 } 8981 8982 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8983 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8984 } 8985 8986 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8987 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8988 } 8989 8990 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8991 uint64_t BasicInstType, 8992 bool SkipDstVcc, 8993 bool SkipSrcVcc) { 8994 using namespace llvm::AMDGPU::SDWA; 8995 8996 OptionalImmIndexMap OptionalIdx; 8997 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8998 bool SkippedVcc = false; 8999 9000 unsigned I = 1; 9001 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9002 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9003 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9004 } 9005 9006 for (unsigned E = Operands.size(); I != E; ++I) { 9007 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9008 if (SkipVcc && !SkippedVcc && Op.isReg() && 9009 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9010 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9011 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9012 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9013 // Skip VCC only if we didn't skip it on previous iteration. 9014 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9015 if (BasicInstType == SIInstrFlags::VOP2 && 9016 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9017 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9018 SkippedVcc = true; 9019 continue; 9020 } else if (BasicInstType == SIInstrFlags::VOPC && 9021 Inst.getNumOperands() == 0) { 9022 SkippedVcc = true; 9023 continue; 9024 } 9025 } 9026 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9027 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9028 } else if (Op.isImm()) { 9029 // Handle optional arguments 9030 OptionalIdx[Op.getImmTy()] = I; 9031 } else { 9032 llvm_unreachable("Invalid operand type"); 9033 } 9034 SkippedVcc = false; 9035 } 9036 9037 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 9038 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 9039 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 9040 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9041 switch (BasicInstType) { 9042 case SIInstrFlags::VOP1: 9043 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9044 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9046 } 9047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9050 break; 9051 9052 case SIInstrFlags::VOP2: 9053 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9054 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9056 } 9057 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9058 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9061 break; 9062 9063 case SIInstrFlags::VOPC: 9064 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 9065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9068 break; 9069 9070 default: 9071 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9072 } 9073 } 9074 9075 // special case v_mac_{f16, f32}: 9076 // it has src2 register operand that is tied to dst operand 9077 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9078 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9079 auto it = Inst.begin(); 9080 std::advance( 9081 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9082 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9083 } 9084 } 9085 9086 //===----------------------------------------------------------------------===// 9087 // mAI 9088 //===----------------------------------------------------------------------===// 9089 9090 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 9091 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 9092 } 9093 9094 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 9095 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 9096 } 9097 9098 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 9099 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 9100 } 9101 9102 /// Force static initialization. 9103 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9104 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 9105 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9106 } 9107 9108 #define GET_REGISTER_MATCHER 9109 #define GET_MATCHER_IMPLEMENTATION 9110 #define GET_MNEMONIC_SPELL_CHECKER 9111 #define GET_MNEMONIC_CHECKER 9112 #include "AMDGPUGenAsmMatcher.inc" 9113 9114 // This function should be defined after auto-generated include so that we have 9115 // MatchClassKind enum defined 9116 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9117 unsigned Kind) { 9118 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9119 // But MatchInstructionImpl() expects to meet token and fails to validate 9120 // operand. This method checks if we are given immediate operand but expect to 9121 // get corresponding token. 9122 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9123 switch (Kind) { 9124 case MCK_addr64: 9125 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9126 case MCK_gds: 9127 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9128 case MCK_lds: 9129 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9130 case MCK_idxen: 9131 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9132 case MCK_offen: 9133 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9134 case MCK_SSrcB32: 9135 // When operands have expression values, they will return true for isToken, 9136 // because it is not possible to distinguish between a token and an 9137 // expression at parse time. MatchInstructionImpl() will always try to 9138 // match an operand as a token, when isToken returns true, and when the 9139 // name of the expression is not a valid token, the match will fail, 9140 // so we need to handle it here. 9141 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9142 case MCK_SSrcF32: 9143 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9144 case MCK_SoppBrTarget: 9145 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9146 case MCK_VReg32OrOff: 9147 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9148 case MCK_InterpSlot: 9149 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9150 case MCK_Attr: 9151 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9152 case MCK_AttrChan: 9153 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9154 case MCK_ImmSMEMOffset: 9155 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9156 case MCK_SReg_64: 9157 case MCK_SReg_64_XEXEC: 9158 // Null is defined as a 32-bit register but 9159 // it should also be enabled with 64-bit operands. 9160 // The following code enables it for SReg_64 operands 9161 // used as source and destination. Remaining source 9162 // operands are handled in isInlinableImm. 9163 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9164 default: 9165 return Match_InvalidOperand; 9166 } 9167 } 9168 9169 //===----------------------------------------------------------------------===// 9170 // endpgm 9171 //===----------------------------------------------------------------------===// 9172 9173 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9174 SMLoc S = getLoc(); 9175 int64_t Imm = 0; 9176 9177 if (!parseExpr(Imm)) { 9178 // The operand is optional, if not present default to 0 9179 Imm = 0; 9180 } 9181 9182 if (!isUInt<16>(Imm)) { 9183 Error(S, "expected a 16-bit value"); 9184 return MatchOperand_ParseFail; 9185 } 9186 9187 Operands.push_back( 9188 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9189 return MatchOperand_Success; 9190 } 9191 9192 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9193 9194 //===----------------------------------------------------------------------===// 9195 // LDSDIR 9196 //===----------------------------------------------------------------------===// 9197 9198 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9199 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9200 } 9201 9202 bool AMDGPUOperand::isWaitVDST() const { 9203 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9204 } 9205 9206 //===----------------------------------------------------------------------===// 9207 // VINTERP 9208 //===----------------------------------------------------------------------===// 9209 9210 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9211 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9212 } 9213 9214 bool AMDGPUOperand::isWaitEXP() const { 9215 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9216 } 9217