1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/AMDGPUMetadata.h" 33 #include "llvm/Support/AMDHSAKernelDescriptor.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/MachineValueType.h" 36 #include "llvm/Support/TargetParser.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrInline(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type); 251 } 252 253 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 254 return isRegOrInline(RCID, type) || isLiteralImm(type); 255 } 256 257 bool isRegOrImmWithInt16InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 259 } 260 261 bool isRegOrImmWithInt32InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 263 } 264 265 bool isRegOrImmWithInt64InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 267 } 268 269 bool isRegOrImmWithFP16InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 271 } 272 273 bool isRegOrImmWithFP32InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 275 } 276 277 bool isRegOrImmWithFP64InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 279 } 280 281 bool isVReg() const { 282 return isRegClass(AMDGPU::VGPR_32RegClassID) || 283 isRegClass(AMDGPU::VReg_64RegClassID) || 284 isRegClass(AMDGPU::VReg_96RegClassID) || 285 isRegClass(AMDGPU::VReg_128RegClassID) || 286 isRegClass(AMDGPU::VReg_160RegClassID) || 287 isRegClass(AMDGPU::VReg_192RegClassID) || 288 isRegClass(AMDGPU::VReg_256RegClassID) || 289 isRegClass(AMDGPU::VReg_512RegClassID) || 290 isRegClass(AMDGPU::VReg_1024RegClassID); 291 } 292 293 bool isVReg32() const { 294 return isRegClass(AMDGPU::VGPR_32RegClassID); 295 } 296 297 bool isVReg32OrOff() const { 298 return isOff() || isVReg32(); 299 } 300 301 bool isNull() const { 302 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 303 } 304 305 bool isVRegWithInputMods() const; 306 307 bool isSDWAOperand(MVT type) const; 308 bool isSDWAFP16Operand() const; 309 bool isSDWAFP32Operand() const; 310 bool isSDWAInt16Operand() const; 311 bool isSDWAInt32Operand() const; 312 313 bool isImmTy(ImmTy ImmT) const { 314 return isImm() && Imm.Type == ImmT; 315 } 316 317 bool isImmModifier() const { 318 return isImm() && Imm.Type != ImmTyNone; 319 } 320 321 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 322 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 323 bool isDMask() const { return isImmTy(ImmTyDMask); } 324 bool isDim() const { return isImmTy(ImmTyDim); } 325 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 326 bool isDA() const { return isImmTy(ImmTyDA); } 327 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 328 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 329 bool isLWE() const { return isImmTy(ImmTyLWE); } 330 bool isOff() const { return isImmTy(ImmTyOff); } 331 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 332 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 333 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 334 bool isOffen() const { return isImmTy(ImmTyOffen); } 335 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 336 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 337 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 338 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 339 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 340 341 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 342 bool isGDS() const { return isImmTy(ImmTyGDS); } 343 bool isLDS() const { return isImmTy(ImmTyLDS); } 344 bool isCPol() const { return isImmTy(ImmTyCPol); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return isRegOrInline(RCID, type) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcV2FP32() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF32(); 455 } 456 457 bool isSCSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSCSrcF32(); 460 } 461 462 bool isSSrcV2INT32() const { 463 llvm_unreachable("cannot happen"); 464 return isSSrcB32(); 465 } 466 467 bool isSCSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSCSrcB32(); 470 } 471 472 bool isSSrcOrLdsB32() const { 473 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 474 isLiteralImm(MVT::i32) || isExpr(); 475 } 476 477 bool isVCSrcB32() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 479 } 480 481 bool isVCSrcB64() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 483 } 484 485 bool isVCSrcB16() const { 486 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 487 } 488 489 bool isVCSrcV2B16() const { 490 return isVCSrcB16(); 491 } 492 493 bool isVCSrcF32() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 495 } 496 497 bool isVCSrcF64() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 499 } 500 501 bool isVCSrcF16() const { 502 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 503 } 504 505 bool isVCSrcV2F16() const { 506 return isVCSrcF16(); 507 } 508 509 bool isVSrcB32() const { 510 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 511 } 512 513 bool isVSrcB64() const { 514 return isVCSrcF64() || isLiteralImm(MVT::i64); 515 } 516 517 bool isVSrcB16() const { 518 return isVCSrcB16() || isLiteralImm(MVT::i16); 519 } 520 521 bool isVSrcV2B16() const { 522 return isVSrcB16() || isLiteralImm(MVT::v2i16); 523 } 524 525 bool isVCSrcV2FP32() const { 526 return isVCSrcF64(); 527 } 528 529 bool isVSrcV2FP32() const { 530 return isVSrcF64() || isLiteralImm(MVT::v2f32); 531 } 532 533 bool isVCSrcV2INT32() const { 534 return isVCSrcB64(); 535 } 536 537 bool isVSrcV2INT32() const { 538 return isVSrcB64() || isLiteralImm(MVT::v2i32); 539 } 540 541 bool isVSrcF32() const { 542 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 543 } 544 545 bool isVSrcF64() const { 546 return isVCSrcF64() || isLiteralImm(MVT::f64); 547 } 548 549 bool isVSrcF16() const { 550 return isVCSrcF16() || isLiteralImm(MVT::f16); 551 } 552 553 bool isVSrcV2F16() const { 554 return isVSrcF16() || isLiteralImm(MVT::v2f16); 555 } 556 557 bool isVISrcB32() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 559 } 560 561 bool isVISrcB16() const { 562 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 563 } 564 565 bool isVISrcV2B16() const { 566 return isVISrcB16(); 567 } 568 569 bool isVISrcF32() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 571 } 572 573 bool isVISrcF16() const { 574 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 575 } 576 577 bool isVISrcV2F16() const { 578 return isVISrcF16() || isVISrcB32(); 579 } 580 581 bool isVISrc_64B64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 583 } 584 585 bool isVISrc_64F64() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 587 } 588 589 bool isVISrc_64V2FP32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 591 } 592 593 bool isVISrc_64V2INT32() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 595 } 596 597 bool isVISrc_256B64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 599 } 600 601 bool isVISrc_256F64() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 603 } 604 605 bool isVISrc_128B16() const { 606 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 607 } 608 609 bool isVISrc_128V2B16() const { 610 return isVISrc_128B16(); 611 } 612 613 bool isVISrc_128B32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 615 } 616 617 bool isVISrc_128F32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2FP32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 623 } 624 625 bool isVISrc_256V2INT32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B32() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 631 } 632 633 bool isVISrc_512B16() const { 634 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 635 } 636 637 bool isVISrc_512V2B16() const { 638 return isVISrc_512B16(); 639 } 640 641 bool isVISrc_512F32() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 643 } 644 645 bool isVISrc_512F16() const { 646 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 647 } 648 649 bool isVISrc_512V2F16() const { 650 return isVISrc_512F16() || isVISrc_512B32(); 651 } 652 653 bool isVISrc_1024B32() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 655 } 656 657 bool isVISrc_1024B16() const { 658 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 659 } 660 661 bool isVISrc_1024V2B16() const { 662 return isVISrc_1024B16(); 663 } 664 665 bool isVISrc_1024F32() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 667 } 668 669 bool isVISrc_1024F16() const { 670 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 671 } 672 673 bool isVISrc_1024V2F16() const { 674 return isVISrc_1024F16() || isVISrc_1024B32(); 675 } 676 677 bool isAISrcB32() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 679 } 680 681 bool isAISrcB16() const { 682 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 683 } 684 685 bool isAISrcV2B16() const { 686 return isAISrcB16(); 687 } 688 689 bool isAISrcF32() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 691 } 692 693 bool isAISrcF16() const { 694 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 695 } 696 697 bool isAISrcV2F16() const { 698 return isAISrcF16() || isAISrcB32(); 699 } 700 701 bool isAISrc_64B64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 703 } 704 705 bool isAISrc_64F64() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 707 } 708 709 bool isAISrc_128B32() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 711 } 712 713 bool isAISrc_128B16() const { 714 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 715 } 716 717 bool isAISrc_128V2B16() const { 718 return isAISrc_128B16(); 719 } 720 721 bool isAISrc_128F32() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 723 } 724 725 bool isAISrc_128F16() const { 726 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 727 } 728 729 bool isAISrc_128V2F16() const { 730 return isAISrc_128F16() || isAISrc_128B32(); 731 } 732 733 bool isVISrc_128F16() const { 734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 735 } 736 737 bool isVISrc_128V2F16() const { 738 return isVISrc_128F16() || isVISrc_128B32(); 739 } 740 741 bool isAISrc_256B64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 743 } 744 745 bool isAISrc_256F64() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 747 } 748 749 bool isAISrc_512B32() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 751 } 752 753 bool isAISrc_512B16() const { 754 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 755 } 756 757 bool isAISrc_512V2B16() const { 758 return isAISrc_512B16(); 759 } 760 761 bool isAISrc_512F32() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 763 } 764 765 bool isAISrc_512F16() const { 766 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 767 } 768 769 bool isAISrc_512V2F16() const { 770 return isAISrc_512F16() || isAISrc_512B32(); 771 } 772 773 bool isAISrc_1024B32() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 775 } 776 777 bool isAISrc_1024B16() const { 778 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 779 } 780 781 bool isAISrc_1024V2B16() const { 782 return isAISrc_1024B16(); 783 } 784 785 bool isAISrc_1024F32() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 787 } 788 789 bool isAISrc_1024F16() const { 790 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 791 } 792 793 bool isAISrc_1024V2F16() const { 794 return isAISrc_1024F16() || isAISrc_1024B32(); 795 } 796 797 bool isKImmFP32() const { 798 return isLiteralImm(MVT::f32); 799 } 800 801 bool isKImmFP16() const { 802 return isLiteralImm(MVT::f16); 803 } 804 805 bool isMem() const override { 806 return false; 807 } 808 809 bool isExpr() const { 810 return Kind == Expression; 811 } 812 813 bool isSoppBrTarget() const { 814 return isExpr() || isImm(); 815 } 816 817 bool isSWaitCnt() const; 818 bool isHwreg() const; 819 bool isSendMsg() const; 820 bool isSwizzle() const; 821 bool isSMRDOffset8() const; 822 bool isSMEMOffset() const; 823 bool isSMRDLiteralOffset() const; 824 bool isDPP8() const; 825 bool isDPPCtrl() const; 826 bool isBLGP() const; 827 bool isCBSZ() const; 828 bool isABID() const; 829 bool isGPRIdxMode() const; 830 bool isS16Imm() const; 831 bool isU16Imm() const; 832 bool isEndpgm() const; 833 834 StringRef getExpressionAsToken() const { 835 assert(isExpr()); 836 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 837 return S->getSymbol().getName(); 838 } 839 840 StringRef getToken() const { 841 assert(isToken()); 842 843 if (Kind == Expression) 844 return getExpressionAsToken(); 845 846 return StringRef(Tok.Data, Tok.Length); 847 } 848 849 int64_t getImm() const { 850 assert(isImm()); 851 return Imm.Val; 852 } 853 854 void setImm(int64_t Val) { 855 assert(isImm()); 856 Imm.Val = Val; 857 } 858 859 ImmTy getImmTy() const { 860 assert(isImm()); 861 return Imm.Type; 862 } 863 864 unsigned getReg() const override { 865 assert(isRegKind()); 866 return Reg.RegNo; 867 } 868 869 SMLoc getStartLoc() const override { 870 return StartLoc; 871 } 872 873 SMLoc getEndLoc() const override { 874 return EndLoc; 875 } 876 877 SMRange getLocRange() const { 878 return SMRange(StartLoc, EndLoc); 879 } 880 881 Modifiers getModifiers() const { 882 assert(isRegKind() || isImmTy(ImmTyNone)); 883 return isRegKind() ? Reg.Mods : Imm.Mods; 884 } 885 886 void setModifiers(Modifiers Mods) { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 if (isRegKind()) 889 Reg.Mods = Mods; 890 else 891 Imm.Mods = Mods; 892 } 893 894 bool hasModifiers() const { 895 return getModifiers().hasModifiers(); 896 } 897 898 bool hasFPModifiers() const { 899 return getModifiers().hasFPModifiers(); 900 } 901 902 bool hasIntModifiers() const { 903 return getModifiers().hasIntModifiers(); 904 } 905 906 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 907 908 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 909 910 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 911 912 template <unsigned Bitwidth> 913 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 914 915 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<16>(Inst, N); 917 } 918 919 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<32>(Inst, N); 921 } 922 923 void addRegOperands(MCInst &Inst, unsigned N) const; 924 925 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 926 addRegOperands(Inst, N); 927 } 928 929 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 930 if (isRegKind()) 931 addRegOperands(Inst, N); 932 else if (isExpr()) 933 Inst.addOperand(MCOperand::createExpr(Expr)); 934 else 935 addImmOperands(Inst, N); 936 } 937 938 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 939 Modifiers Mods = getModifiers(); 940 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 941 if (isRegKind()) { 942 addRegOperands(Inst, N); 943 } else { 944 addImmOperands(Inst, N, false); 945 } 946 } 947 948 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 949 assert(!hasIntModifiers()); 950 addRegOrImmWithInputModsOperands(Inst, N); 951 } 952 953 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasFPModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 959 Modifiers Mods = getModifiers(); 960 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 961 assert(isRegKind()); 962 addRegOperands(Inst, N); 963 } 964 965 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 966 assert(!hasIntModifiers()); 967 addRegWithInputModsOperands(Inst, N); 968 } 969 970 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasFPModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 976 if (isImm()) 977 addImmOperands(Inst, N); 978 else { 979 assert(isExpr()); 980 Inst.addOperand(MCOperand::createExpr(Expr)); 981 } 982 } 983 984 static void printImmTy(raw_ostream& OS, ImmTy Type) { 985 switch (Type) { 986 case ImmTyNone: OS << "None"; break; 987 case ImmTyGDS: OS << "GDS"; break; 988 case ImmTyLDS: OS << "LDS"; break; 989 case ImmTyOffen: OS << "Offen"; break; 990 case ImmTyIdxen: OS << "Idxen"; break; 991 case ImmTyAddr64: OS << "Addr64"; break; 992 case ImmTyOffset: OS << "Offset"; break; 993 case ImmTyInstOffset: OS << "InstOffset"; break; 994 case ImmTyOffset0: OS << "Offset0"; break; 995 case ImmTyOffset1: OS << "Offset1"; break; 996 case ImmTyCPol: OS << "CPol"; break; 997 case ImmTySWZ: OS << "SWZ"; break; 998 case ImmTyTFE: OS << "TFE"; break; 999 case ImmTyD16: OS << "D16"; break; 1000 case ImmTyFORMAT: OS << "FORMAT"; break; 1001 case ImmTyClampSI: OS << "ClampSI"; break; 1002 case ImmTyOModSI: OS << "OModSI"; break; 1003 case ImmTyDPP8: OS << "DPP8"; break; 1004 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1005 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1006 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1007 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1008 case ImmTyDppFi: OS << "FI"; break; 1009 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1010 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1011 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1012 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1013 case ImmTyDMask: OS << "DMask"; break; 1014 case ImmTyDim: OS << "Dim"; break; 1015 case ImmTyUNorm: OS << "UNorm"; break; 1016 case ImmTyDA: OS << "DA"; break; 1017 case ImmTyR128A16: OS << "R128A16"; break; 1018 case ImmTyA16: OS << "A16"; break; 1019 case ImmTyLWE: OS << "LWE"; break; 1020 case ImmTyOff: OS << "Off"; break; 1021 case ImmTyExpTgt: OS << "ExpTgt"; break; 1022 case ImmTyExpCompr: OS << "ExpCompr"; break; 1023 case ImmTyExpVM: OS << "ExpVM"; break; 1024 case ImmTyHwreg: OS << "Hwreg"; break; 1025 case ImmTySendMsg: OS << "SendMsg"; break; 1026 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1027 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1028 case ImmTyAttrChan: OS << "AttrChan"; break; 1029 case ImmTyOpSel: OS << "OpSel"; break; 1030 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1031 case ImmTyNegLo: OS << "NegLo"; break; 1032 case ImmTyNegHi: OS << "NegHi"; break; 1033 case ImmTySwizzle: OS << "Swizzle"; break; 1034 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1035 case ImmTyHigh: OS << "High"; break; 1036 case ImmTyBLGP: OS << "BLGP"; break; 1037 case ImmTyCBSZ: OS << "CBSZ"; break; 1038 case ImmTyABID: OS << "ABID"; break; 1039 case ImmTyEndpgm: OS << "Endpgm"; break; 1040 } 1041 } 1042 1043 void print(raw_ostream &OS) const override { 1044 switch (Kind) { 1045 case Register: 1046 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1047 break; 1048 case Immediate: 1049 OS << '<' << getImm(); 1050 if (getImmTy() != ImmTyNone) { 1051 OS << " type: "; printImmTy(OS, getImmTy()); 1052 } 1053 OS << " mods: " << Imm.Mods << '>'; 1054 break; 1055 case Token: 1056 OS << '\'' << getToken() << '\''; 1057 break; 1058 case Expression: 1059 OS << "<expr " << *Expr << '>'; 1060 break; 1061 } 1062 } 1063 1064 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1065 int64_t Val, SMLoc Loc, 1066 ImmTy Type = ImmTyNone, 1067 bool IsFPImm = false) { 1068 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1069 Op->Imm.Val = Val; 1070 Op->Imm.IsFPImm = IsFPImm; 1071 Op->Imm.Kind = ImmKindTyNone; 1072 Op->Imm.Type = Type; 1073 Op->Imm.Mods = Modifiers(); 1074 Op->StartLoc = Loc; 1075 Op->EndLoc = Loc; 1076 return Op; 1077 } 1078 1079 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1080 StringRef Str, SMLoc Loc, 1081 bool HasExplicitEncodingSize = true) { 1082 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1083 Res->Tok.Data = Str.data(); 1084 Res->Tok.Length = Str.size(); 1085 Res->StartLoc = Loc; 1086 Res->EndLoc = Loc; 1087 return Res; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1091 unsigned RegNo, SMLoc S, 1092 SMLoc E) { 1093 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1094 Op->Reg.RegNo = RegNo; 1095 Op->Reg.Mods = Modifiers(); 1096 Op->StartLoc = S; 1097 Op->EndLoc = E; 1098 return Op; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1102 const class MCExpr *Expr, SMLoc S) { 1103 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1104 Op->Expr = Expr; 1105 Op->StartLoc = S; 1106 Op->EndLoc = S; 1107 return Op; 1108 } 1109 }; 1110 1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1112 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1113 return OS; 1114 } 1115 1116 //===----------------------------------------------------------------------===// 1117 // AsmParser 1118 //===----------------------------------------------------------------------===// 1119 1120 // Holds info related to the current kernel, e.g. count of SGPRs used. 1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1122 // .amdgpu_hsa_kernel or at EOF. 1123 class KernelScopeInfo { 1124 int SgprIndexUnusedMin = -1; 1125 int VgprIndexUnusedMin = -1; 1126 MCContext *Ctx = nullptr; 1127 1128 void usesSgprAt(int i) { 1129 if (i >= SgprIndexUnusedMin) { 1130 SgprIndexUnusedMin = ++i; 1131 if (Ctx) { 1132 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1133 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1134 } 1135 } 1136 } 1137 1138 void usesVgprAt(int i) { 1139 if (i >= VgprIndexUnusedMin) { 1140 VgprIndexUnusedMin = ++i; 1141 if (Ctx) { 1142 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1143 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1144 } 1145 } 1146 } 1147 1148 public: 1149 KernelScopeInfo() = default; 1150 1151 void initialize(MCContext &Context) { 1152 Ctx = &Context; 1153 usesSgprAt(SgprIndexUnusedMin = -1); 1154 usesVgprAt(VgprIndexUnusedMin = -1); 1155 } 1156 1157 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1158 switch (RegKind) { 1159 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1160 case IS_AGPR: // fall through 1161 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1162 default: break; 1163 } 1164 } 1165 }; 1166 1167 class AMDGPUAsmParser : public MCTargetAsmParser { 1168 MCAsmParser &Parser; 1169 1170 // Number of extra operands parsed after the first optional operand. 1171 // This may be necessary to skip hardcoded mandatory operands. 1172 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1173 1174 unsigned ForcedEncodingSize = 0; 1175 bool ForcedDPP = false; 1176 bool ForcedSDWA = false; 1177 KernelScopeInfo KernelScope; 1178 unsigned CPolSeen; 1179 1180 /// @name Auto-generated Match Functions 1181 /// { 1182 1183 #define GET_ASSEMBLER_HEADER 1184 #include "AMDGPUGenAsmMatcher.inc" 1185 1186 /// } 1187 1188 private: 1189 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1190 bool OutOfRangeError(SMRange Range); 1191 /// Calculate VGPR/SGPR blocks required for given target, reserved 1192 /// registers, and user-specified NextFreeXGPR values. 1193 /// 1194 /// \param Features [in] Target features, used for bug corrections. 1195 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1196 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1197 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1198 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1199 /// descriptor field, if valid. 1200 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1201 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1202 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1203 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1204 /// \param VGPRBlocks [out] Result VGPR block count. 1205 /// \param SGPRBlocks [out] Result SGPR block count. 1206 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1207 bool FlatScrUsed, bool XNACKUsed, 1208 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1209 SMRange VGPRRange, unsigned NextFreeSGPR, 1210 SMRange SGPRRange, unsigned &VGPRBlocks, 1211 unsigned &SGPRBlocks); 1212 bool ParseDirectiveAMDGCNTarget(); 1213 bool ParseDirectiveAMDHSAKernel(); 1214 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1215 bool ParseDirectiveHSACodeObjectVersion(); 1216 bool ParseDirectiveHSACodeObjectISA(); 1217 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1218 bool ParseDirectiveAMDKernelCodeT(); 1219 // TODO: Possibly make subtargetHasRegister const. 1220 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1221 bool ParseDirectiveAMDGPUHsaKernel(); 1222 1223 bool ParseDirectiveISAVersion(); 1224 bool ParseDirectiveHSAMetadata(); 1225 bool ParseDirectivePALMetadataBegin(); 1226 bool ParseDirectivePALMetadata(); 1227 bool ParseDirectiveAMDGPULDS(); 1228 1229 /// Common code to parse out a block of text (typically YAML) between start and 1230 /// end directives. 1231 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1232 const char *AssemblerDirectiveEnd, 1233 std::string &CollectString); 1234 1235 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1236 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1237 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1238 unsigned &RegNum, unsigned &RegWidth, 1239 bool RestoreOnFailure = false); 1240 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1241 unsigned &RegNum, unsigned &RegWidth, 1242 SmallVectorImpl<AsmToken> &Tokens); 1243 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1244 unsigned &RegWidth, 1245 SmallVectorImpl<AsmToken> &Tokens); 1246 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1247 unsigned &RegWidth, 1248 SmallVectorImpl<AsmToken> &Tokens); 1249 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1250 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1251 bool ParseRegRange(unsigned& Num, unsigned& Width); 1252 unsigned getRegularReg(RegisterKind RegKind, 1253 unsigned RegNum, 1254 unsigned RegWidth, 1255 SMLoc Loc); 1256 1257 bool isRegister(); 1258 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1259 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1260 void initializeGprCountSymbol(RegisterKind RegKind); 1261 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1262 unsigned RegWidth); 1263 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1264 bool IsAtomic, bool IsLds = false); 1265 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1266 bool IsGdsHardcoded); 1267 1268 public: 1269 enum AMDGPUMatchResultTy { 1270 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1271 }; 1272 enum OperandMode { 1273 OperandMode_Default, 1274 OperandMode_NSA, 1275 }; 1276 1277 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1278 1279 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1280 const MCInstrInfo &MII, 1281 const MCTargetOptions &Options) 1282 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1283 MCAsmParserExtension::Initialize(Parser); 1284 1285 if (getFeatureBits().none()) { 1286 // Set default features. 1287 copySTI().ToggleFeature("southern-islands"); 1288 } 1289 1290 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1291 1292 { 1293 // TODO: make those pre-defined variables read-only. 1294 // Currently there is none suitable machinery in the core llvm-mc for this. 1295 // MCSymbol::isRedefinable is intended for another purpose, and 1296 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1297 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1298 MCContext &Ctx = getContext(); 1299 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1300 MCSymbol *Sym = 1301 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1303 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1304 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1305 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1307 } else { 1308 MCSymbol *Sym = 1309 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1313 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1314 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1315 } 1316 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1317 initializeGprCountSymbol(IS_VGPR); 1318 initializeGprCountSymbol(IS_SGPR); 1319 } else 1320 KernelScope.initialize(getContext()); 1321 } 1322 } 1323 1324 bool hasMIMG_R128() const { 1325 return AMDGPU::hasMIMG_R128(getSTI()); 1326 } 1327 1328 bool hasPackedD16() const { 1329 return AMDGPU::hasPackedD16(getSTI()); 1330 } 1331 1332 bool hasGFX10A16() const { 1333 return AMDGPU::hasGFX10A16(getSTI()); 1334 } 1335 1336 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1337 1338 bool isSI() const { 1339 return AMDGPU::isSI(getSTI()); 1340 } 1341 1342 bool isCI() const { 1343 return AMDGPU::isCI(getSTI()); 1344 } 1345 1346 bool isVI() const { 1347 return AMDGPU::isVI(getSTI()); 1348 } 1349 1350 bool isGFX9() const { 1351 return AMDGPU::isGFX9(getSTI()); 1352 } 1353 1354 bool isGFX90A() const { 1355 return AMDGPU::isGFX90A(getSTI()); 1356 } 1357 1358 bool isGFX9Plus() const { 1359 return AMDGPU::isGFX9Plus(getSTI()); 1360 } 1361 1362 bool isGFX10() const { 1363 return AMDGPU::isGFX10(getSTI()); 1364 } 1365 1366 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1367 1368 bool isGFX10_BEncoding() const { 1369 return AMDGPU::isGFX10_BEncoding(getSTI()); 1370 } 1371 1372 bool hasInv2PiInlineImm() const { 1373 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1374 } 1375 1376 bool hasFlatOffsets() const { 1377 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1378 } 1379 1380 bool hasArchitectedFlatScratch() const { 1381 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1382 } 1383 1384 bool hasSGPR102_SGPR103() const { 1385 return !isVI() && !isGFX9(); 1386 } 1387 1388 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1389 1390 bool hasIntClamp() const { 1391 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1392 } 1393 1394 AMDGPUTargetStreamer &getTargetStreamer() { 1395 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1396 return static_cast<AMDGPUTargetStreamer &>(TS); 1397 } 1398 1399 const MCRegisterInfo *getMRI() const { 1400 // We need this const_cast because for some reason getContext() is not const 1401 // in MCAsmParser. 1402 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1403 } 1404 1405 const MCInstrInfo *getMII() const { 1406 return &MII; 1407 } 1408 1409 const FeatureBitset &getFeatureBits() const { 1410 return getSTI().getFeatureBits(); 1411 } 1412 1413 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1414 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1415 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1416 1417 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1418 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1419 bool isForcedDPP() const { return ForcedDPP; } 1420 bool isForcedSDWA() const { return ForcedSDWA; } 1421 ArrayRef<unsigned> getMatchedVariants() const; 1422 StringRef getMatchedVariantName() const; 1423 1424 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1425 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1426 bool RestoreOnFailure); 1427 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1428 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1429 SMLoc &EndLoc) override; 1430 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1431 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1432 unsigned Kind) override; 1433 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1434 OperandVector &Operands, MCStreamer &Out, 1435 uint64_t &ErrorInfo, 1436 bool MatchingInlineAsm) override; 1437 bool ParseDirective(AsmToken DirectiveID) override; 1438 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1439 OperandMode Mode = OperandMode_Default); 1440 StringRef parseMnemonicSuffix(StringRef Name); 1441 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1442 SMLoc NameLoc, OperandVector &Operands) override; 1443 //bool ProcessInstruction(MCInst &Inst); 1444 1445 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1446 1447 OperandMatchResultTy 1448 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1449 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1450 bool (*ConvertResult)(int64_t &) = nullptr); 1451 1452 OperandMatchResultTy 1453 parseOperandArrayWithPrefix(const char *Prefix, 1454 OperandVector &Operands, 1455 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1456 bool (*ConvertResult)(int64_t&) = nullptr); 1457 1458 OperandMatchResultTy 1459 parseNamedBit(StringRef Name, OperandVector &Operands, 1460 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1461 OperandMatchResultTy parseCPol(OperandVector &Operands); 1462 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1463 StringRef &Value, 1464 SMLoc &StringLoc); 1465 1466 bool isModifier(); 1467 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1468 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1469 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1470 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool parseSP3NegModifier(); 1472 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1473 OperandMatchResultTy parseReg(OperandVector &Operands); 1474 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1475 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1476 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1477 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1478 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1479 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1480 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1481 OperandMatchResultTy parseUfmt(int64_t &Format); 1482 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1483 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1484 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1485 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1486 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1487 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1488 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1489 1490 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1491 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1492 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1493 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1494 1495 bool parseCnt(int64_t &IntVal); 1496 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1497 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1498 1499 private: 1500 struct OperandInfoTy { 1501 SMLoc Loc; 1502 int64_t Id; 1503 bool IsSymbolic = false; 1504 bool IsDefined = false; 1505 1506 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1507 }; 1508 1509 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1510 bool validateSendMsg(const OperandInfoTy &Msg, 1511 const OperandInfoTy &Op, 1512 const OperandInfoTy &Stream); 1513 1514 bool parseHwregBody(OperandInfoTy &HwReg, 1515 OperandInfoTy &Offset, 1516 OperandInfoTy &Width); 1517 bool validateHwreg(const OperandInfoTy &HwReg, 1518 const OperandInfoTy &Offset, 1519 const OperandInfoTy &Width); 1520 1521 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1522 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1523 1524 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1525 const OperandVector &Operands) const; 1526 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1527 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1528 SMLoc getLitLoc(const OperandVector &Operands) const; 1529 SMLoc getConstLoc(const OperandVector &Operands) const; 1530 1531 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1532 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1534 bool validateSOPLiteral(const MCInst &Inst) const; 1535 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateIntClampSupported(const MCInst &Inst); 1538 bool validateMIMGAtomicDMask(const MCInst &Inst); 1539 bool validateMIMGGatherDMask(const MCInst &Inst); 1540 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1541 bool validateMIMGDataSize(const MCInst &Inst); 1542 bool validateMIMGAddrSize(const MCInst &Inst); 1543 bool validateMIMGD16(const MCInst &Inst); 1544 bool validateMIMGDim(const MCInst &Inst); 1545 bool validateMIMGMSAA(const MCInst &Inst); 1546 bool validateOpSel(const MCInst &Inst); 1547 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1548 bool validateVccOperand(unsigned Reg) const; 1549 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1551 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1552 bool validateAGPRLdSt(const MCInst &Inst) const; 1553 bool validateVGPRAlign(const MCInst &Inst) const; 1554 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1555 bool validateDivScale(const MCInst &Inst); 1556 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1557 const SMLoc &IDLoc); 1558 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1559 unsigned getConstantBusLimit(unsigned Opcode) const; 1560 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1561 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1562 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1563 1564 bool isSupportedMnemo(StringRef Mnemo, 1565 const FeatureBitset &FBS); 1566 bool isSupportedMnemo(StringRef Mnemo, 1567 const FeatureBitset &FBS, 1568 ArrayRef<unsigned> Variants); 1569 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1570 1571 bool isId(const StringRef Id) const; 1572 bool isId(const AsmToken &Token, const StringRef Id) const; 1573 bool isToken(const AsmToken::TokenKind Kind) const; 1574 bool trySkipId(const StringRef Id); 1575 bool trySkipId(const StringRef Pref, const StringRef Id); 1576 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1577 bool trySkipToken(const AsmToken::TokenKind Kind); 1578 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1579 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1580 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1581 1582 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1583 AsmToken::TokenKind getTokenKind() const; 1584 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1585 bool parseExpr(OperandVector &Operands); 1586 StringRef getTokenStr() const; 1587 AsmToken peekToken(); 1588 AsmToken getToken() const; 1589 SMLoc getLoc() const; 1590 void lex(); 1591 1592 public: 1593 void onBeginOfFile() override; 1594 1595 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1596 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1597 1598 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1599 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1600 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1601 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1602 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1603 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1604 1605 bool parseSwizzleOperand(int64_t &Op, 1606 const unsigned MinVal, 1607 const unsigned MaxVal, 1608 const StringRef ErrMsg, 1609 SMLoc &Loc); 1610 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1611 const unsigned MinVal, 1612 const unsigned MaxVal, 1613 const StringRef ErrMsg); 1614 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1615 bool parseSwizzleOffset(int64_t &Imm); 1616 bool parseSwizzleMacro(int64_t &Imm); 1617 bool parseSwizzleQuadPerm(int64_t &Imm); 1618 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1619 bool parseSwizzleBroadcast(int64_t &Imm); 1620 bool parseSwizzleSwap(int64_t &Imm); 1621 bool parseSwizzleReverse(int64_t &Imm); 1622 1623 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1624 int64_t parseGPRIdxMacro(); 1625 1626 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1627 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1628 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1629 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1630 1631 AMDGPUOperand::Ptr defaultCPol() const; 1632 1633 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1634 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1635 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1636 AMDGPUOperand::Ptr defaultFlatOffset() const; 1637 1638 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1639 1640 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1641 OptionalImmIndexMap &OptionalIdx); 1642 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1643 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1644 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1645 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1646 OptionalImmIndexMap &OptionalIdx); 1647 1648 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1649 1650 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1651 bool IsAtomic = false); 1652 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1653 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1654 1655 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1656 1657 bool parseDimId(unsigned &Encoding); 1658 OperandMatchResultTy parseDim(OperandVector &Operands); 1659 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1660 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1661 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1662 int64_t parseDPPCtrlSel(StringRef Ctrl); 1663 int64_t parseDPPCtrlPerm(); 1664 AMDGPUOperand::Ptr defaultRowMask() const; 1665 AMDGPUOperand::Ptr defaultBankMask() const; 1666 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1667 AMDGPUOperand::Ptr defaultFI() const; 1668 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1669 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1670 1671 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1672 AMDGPUOperand::ImmTy Type); 1673 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1674 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1675 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1676 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1677 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1678 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1679 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1680 uint64_t BasicInstType, 1681 bool SkipDstVcc = false, 1682 bool SkipSrcVcc = false); 1683 1684 AMDGPUOperand::Ptr defaultBLGP() const; 1685 AMDGPUOperand::Ptr defaultCBSZ() const; 1686 AMDGPUOperand::Ptr defaultABID() const; 1687 1688 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1689 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1690 }; 1691 1692 struct OptionalOperand { 1693 const char *Name; 1694 AMDGPUOperand::ImmTy Type; 1695 bool IsBit; 1696 bool (*ConvertResult)(int64_t&); 1697 }; 1698 1699 } // end anonymous namespace 1700 1701 // May be called with integer type with equivalent bitwidth. 1702 static const fltSemantics *getFltSemantics(unsigned Size) { 1703 switch (Size) { 1704 case 4: 1705 return &APFloat::IEEEsingle(); 1706 case 8: 1707 return &APFloat::IEEEdouble(); 1708 case 2: 1709 return &APFloat::IEEEhalf(); 1710 default: 1711 llvm_unreachable("unsupported fp type"); 1712 } 1713 } 1714 1715 static const fltSemantics *getFltSemantics(MVT VT) { 1716 return getFltSemantics(VT.getSizeInBits() / 8); 1717 } 1718 1719 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1720 switch (OperandType) { 1721 case AMDGPU::OPERAND_REG_IMM_INT32: 1722 case AMDGPU::OPERAND_REG_IMM_FP32: 1723 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1724 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1725 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1727 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1728 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1729 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1730 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1731 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1732 case AMDGPU::OPERAND_KIMM32: 1733 return &APFloat::IEEEsingle(); 1734 case AMDGPU::OPERAND_REG_IMM_INT64: 1735 case AMDGPU::OPERAND_REG_IMM_FP64: 1736 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1737 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1738 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1739 return &APFloat::IEEEdouble(); 1740 case AMDGPU::OPERAND_REG_IMM_INT16: 1741 case AMDGPU::OPERAND_REG_IMM_FP16: 1742 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1743 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1744 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1745 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1746 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1747 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1748 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1750 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1751 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1752 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1753 case AMDGPU::OPERAND_KIMM16: 1754 return &APFloat::IEEEhalf(); 1755 default: 1756 llvm_unreachable("unsupported fp type"); 1757 } 1758 } 1759 1760 //===----------------------------------------------------------------------===// 1761 // Operand 1762 //===----------------------------------------------------------------------===// 1763 1764 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1765 bool Lost; 1766 1767 // Convert literal to single precision 1768 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1769 APFloat::rmNearestTiesToEven, 1770 &Lost); 1771 // We allow precision lost but not overflow or underflow 1772 if (Status != APFloat::opOK && 1773 Lost && 1774 ((Status & APFloat::opOverflow) != 0 || 1775 (Status & APFloat::opUnderflow) != 0)) { 1776 return false; 1777 } 1778 1779 return true; 1780 } 1781 1782 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1783 return isUIntN(Size, Val) || isIntN(Size, Val); 1784 } 1785 1786 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1787 if (VT.getScalarType() == MVT::i16) { 1788 // FP immediate values are broken. 1789 return isInlinableIntLiteral(Val); 1790 } 1791 1792 // f16/v2f16 operands work correctly for all values. 1793 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1794 } 1795 1796 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1797 1798 // This is a hack to enable named inline values like 1799 // shared_base with both 32-bit and 64-bit operands. 1800 // Note that these values are defined as 1801 // 32-bit operands only. 1802 if (isInlineValue()) { 1803 return true; 1804 } 1805 1806 if (!isImmTy(ImmTyNone)) { 1807 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1808 return false; 1809 } 1810 // TODO: We should avoid using host float here. It would be better to 1811 // check the float bit values which is what a few other places do. 1812 // We've had bot failures before due to weird NaN support on mips hosts. 1813 1814 APInt Literal(64, Imm.Val); 1815 1816 if (Imm.IsFPImm) { // We got fp literal token 1817 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1818 return AMDGPU::isInlinableLiteral64(Imm.Val, 1819 AsmParser->hasInv2PiInlineImm()); 1820 } 1821 1822 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1823 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1824 return false; 1825 1826 if (type.getScalarSizeInBits() == 16) { 1827 return isInlineableLiteralOp16( 1828 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1829 type, AsmParser->hasInv2PiInlineImm()); 1830 } 1831 1832 // Check if single precision literal is inlinable 1833 return AMDGPU::isInlinableLiteral32( 1834 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1835 AsmParser->hasInv2PiInlineImm()); 1836 } 1837 1838 // We got int literal token. 1839 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1840 return AMDGPU::isInlinableLiteral64(Imm.Val, 1841 AsmParser->hasInv2PiInlineImm()); 1842 } 1843 1844 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1845 return false; 1846 } 1847 1848 if (type.getScalarSizeInBits() == 16) { 1849 return isInlineableLiteralOp16( 1850 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1851 type, AsmParser->hasInv2PiInlineImm()); 1852 } 1853 1854 return AMDGPU::isInlinableLiteral32( 1855 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1856 AsmParser->hasInv2PiInlineImm()); 1857 } 1858 1859 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1860 // Check that this immediate can be added as literal 1861 if (!isImmTy(ImmTyNone)) { 1862 return false; 1863 } 1864 1865 if (!Imm.IsFPImm) { 1866 // We got int literal token. 1867 1868 if (type == MVT::f64 && hasFPModifiers()) { 1869 // Cannot apply fp modifiers to int literals preserving the same semantics 1870 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1871 // disable these cases. 1872 return false; 1873 } 1874 1875 unsigned Size = type.getSizeInBits(); 1876 if (Size == 64) 1877 Size = 32; 1878 1879 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1880 // types. 1881 return isSafeTruncation(Imm.Val, Size); 1882 } 1883 1884 // We got fp literal token 1885 if (type == MVT::f64) { // Expected 64-bit fp operand 1886 // We would set low 64-bits of literal to zeroes but we accept this literals 1887 return true; 1888 } 1889 1890 if (type == MVT::i64) { // Expected 64-bit int operand 1891 // We don't allow fp literals in 64-bit integer instructions. It is 1892 // unclear how we should encode them. 1893 return false; 1894 } 1895 1896 // We allow fp literals with f16x2 operands assuming that the specified 1897 // literal goes into the lower half and the upper half is zero. We also 1898 // require that the literal may be losslesly converted to f16. 1899 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1900 (type == MVT::v2i16)? MVT::i16 : 1901 (type == MVT::v2f32)? MVT::f32 : type; 1902 1903 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1904 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1905 } 1906 1907 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1908 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1909 } 1910 1911 bool AMDGPUOperand::isVRegWithInputMods() const { 1912 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1913 // GFX90A allows DPP on 64-bit operands. 1914 (isRegClass(AMDGPU::VReg_64RegClassID) && 1915 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1916 } 1917 1918 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1919 if (AsmParser->isVI()) 1920 return isVReg32(); 1921 else if (AsmParser->isGFX9Plus()) 1922 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1923 else 1924 return false; 1925 } 1926 1927 bool AMDGPUOperand::isSDWAFP16Operand() const { 1928 return isSDWAOperand(MVT::f16); 1929 } 1930 1931 bool AMDGPUOperand::isSDWAFP32Operand() const { 1932 return isSDWAOperand(MVT::f32); 1933 } 1934 1935 bool AMDGPUOperand::isSDWAInt16Operand() const { 1936 return isSDWAOperand(MVT::i16); 1937 } 1938 1939 bool AMDGPUOperand::isSDWAInt32Operand() const { 1940 return isSDWAOperand(MVT::i32); 1941 } 1942 1943 bool AMDGPUOperand::isBoolReg() const { 1944 auto FB = AsmParser->getFeatureBits(); 1945 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1946 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1947 } 1948 1949 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1950 { 1951 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1952 assert(Size == 2 || Size == 4 || Size == 8); 1953 1954 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1955 1956 if (Imm.Mods.Abs) { 1957 Val &= ~FpSignMask; 1958 } 1959 if (Imm.Mods.Neg) { 1960 Val ^= FpSignMask; 1961 } 1962 1963 return Val; 1964 } 1965 1966 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1967 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1968 Inst.getNumOperands())) { 1969 addLiteralImmOperand(Inst, Imm.Val, 1970 ApplyModifiers & 1971 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1972 } else { 1973 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1974 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1975 setImmKindNone(); 1976 } 1977 } 1978 1979 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1980 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1981 auto OpNum = Inst.getNumOperands(); 1982 // Check that this operand accepts literals 1983 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1984 1985 if (ApplyModifiers) { 1986 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1987 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1988 Val = applyInputFPModifiers(Val, Size); 1989 } 1990 1991 APInt Literal(64, Val); 1992 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1993 1994 if (Imm.IsFPImm) { // We got fp literal token 1995 switch (OpTy) { 1996 case AMDGPU::OPERAND_REG_IMM_INT64: 1997 case AMDGPU::OPERAND_REG_IMM_FP64: 1998 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1999 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2000 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2001 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2002 AsmParser->hasInv2PiInlineImm())) { 2003 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2004 setImmKindConst(); 2005 return; 2006 } 2007 2008 // Non-inlineable 2009 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2010 // For fp operands we check if low 32 bits are zeros 2011 if (Literal.getLoBits(32) != 0) { 2012 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2013 "Can't encode literal as exact 64-bit floating-point operand. " 2014 "Low 32-bits will be set to zero"); 2015 } 2016 2017 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2018 setImmKindLiteral(); 2019 return; 2020 } 2021 2022 // We don't allow fp literals in 64-bit integer instructions. It is 2023 // unclear how we should encode them. This case should be checked earlier 2024 // in predicate methods (isLiteralImm()) 2025 llvm_unreachable("fp literal in 64-bit integer instruction."); 2026 2027 case AMDGPU::OPERAND_REG_IMM_INT32: 2028 case AMDGPU::OPERAND_REG_IMM_FP32: 2029 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2030 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2031 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2033 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2034 case AMDGPU::OPERAND_REG_IMM_INT16: 2035 case AMDGPU::OPERAND_REG_IMM_FP16: 2036 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2037 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2038 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2039 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2040 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2041 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2042 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2043 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2044 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2045 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2046 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2047 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2048 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2049 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2050 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2051 case AMDGPU::OPERAND_KIMM32: 2052 case AMDGPU::OPERAND_KIMM16: { 2053 bool lost; 2054 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2055 // Convert literal to single precision 2056 FPLiteral.convert(*getOpFltSemantics(OpTy), 2057 APFloat::rmNearestTiesToEven, &lost); 2058 // We allow precision lost but not overflow or underflow. This should be 2059 // checked earlier in isLiteralImm() 2060 2061 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2062 Inst.addOperand(MCOperand::createImm(ImmVal)); 2063 setImmKindLiteral(); 2064 return; 2065 } 2066 default: 2067 llvm_unreachable("invalid operand size"); 2068 } 2069 2070 return; 2071 } 2072 2073 // We got int literal token. 2074 // Only sign extend inline immediates. 2075 switch (OpTy) { 2076 case AMDGPU::OPERAND_REG_IMM_INT32: 2077 case AMDGPU::OPERAND_REG_IMM_FP32: 2078 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2079 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2080 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2083 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2084 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2085 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2086 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2087 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2088 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2089 if (isSafeTruncation(Val, 32) && 2090 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2091 AsmParser->hasInv2PiInlineImm())) { 2092 Inst.addOperand(MCOperand::createImm(Val)); 2093 setImmKindConst(); 2094 return; 2095 } 2096 2097 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2098 setImmKindLiteral(); 2099 return; 2100 2101 case AMDGPU::OPERAND_REG_IMM_INT64: 2102 case AMDGPU::OPERAND_REG_IMM_FP64: 2103 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2104 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2105 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2106 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2107 Inst.addOperand(MCOperand::createImm(Val)); 2108 setImmKindConst(); 2109 return; 2110 } 2111 2112 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2113 setImmKindLiteral(); 2114 return; 2115 2116 case AMDGPU::OPERAND_REG_IMM_INT16: 2117 case AMDGPU::OPERAND_REG_IMM_FP16: 2118 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2119 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2120 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2123 if (isSafeTruncation(Val, 16) && 2124 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2125 AsmParser->hasInv2PiInlineImm())) { 2126 Inst.addOperand(MCOperand::createImm(Val)); 2127 setImmKindConst(); 2128 return; 2129 } 2130 2131 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2132 setImmKindLiteral(); 2133 return; 2134 2135 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2136 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2137 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2138 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2139 assert(isSafeTruncation(Val, 16)); 2140 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2141 AsmParser->hasInv2PiInlineImm())); 2142 2143 Inst.addOperand(MCOperand::createImm(Val)); 2144 return; 2145 } 2146 case AMDGPU::OPERAND_KIMM32: 2147 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2148 setImmKindNone(); 2149 return; 2150 case AMDGPU::OPERAND_KIMM16: 2151 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2152 setImmKindNone(); 2153 return; 2154 default: 2155 llvm_unreachable("invalid operand size"); 2156 } 2157 } 2158 2159 template <unsigned Bitwidth> 2160 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2161 APInt Literal(64, Imm.Val); 2162 setImmKindNone(); 2163 2164 if (!Imm.IsFPImm) { 2165 // We got int literal token. 2166 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2167 return; 2168 } 2169 2170 bool Lost; 2171 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2172 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2173 APFloat::rmNearestTiesToEven, &Lost); 2174 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2175 } 2176 2177 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2178 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2179 } 2180 2181 static bool isInlineValue(unsigned Reg) { 2182 switch (Reg) { 2183 case AMDGPU::SRC_SHARED_BASE: 2184 case AMDGPU::SRC_SHARED_LIMIT: 2185 case AMDGPU::SRC_PRIVATE_BASE: 2186 case AMDGPU::SRC_PRIVATE_LIMIT: 2187 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2188 return true; 2189 case AMDGPU::SRC_VCCZ: 2190 case AMDGPU::SRC_EXECZ: 2191 case AMDGPU::SRC_SCC: 2192 return true; 2193 case AMDGPU::SGPR_NULL: 2194 return true; 2195 default: 2196 return false; 2197 } 2198 } 2199 2200 bool AMDGPUOperand::isInlineValue() const { 2201 return isRegKind() && ::isInlineValue(getReg()); 2202 } 2203 2204 //===----------------------------------------------------------------------===// 2205 // AsmParser 2206 //===----------------------------------------------------------------------===// 2207 2208 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2209 if (Is == IS_VGPR) { 2210 switch (RegWidth) { 2211 default: return -1; 2212 case 1: return AMDGPU::VGPR_32RegClassID; 2213 case 2: return AMDGPU::VReg_64RegClassID; 2214 case 3: return AMDGPU::VReg_96RegClassID; 2215 case 4: return AMDGPU::VReg_128RegClassID; 2216 case 5: return AMDGPU::VReg_160RegClassID; 2217 case 6: return AMDGPU::VReg_192RegClassID; 2218 case 7: return AMDGPU::VReg_224RegClassID; 2219 case 8: return AMDGPU::VReg_256RegClassID; 2220 case 16: return AMDGPU::VReg_512RegClassID; 2221 case 32: return AMDGPU::VReg_1024RegClassID; 2222 } 2223 } else if (Is == IS_TTMP) { 2224 switch (RegWidth) { 2225 default: return -1; 2226 case 1: return AMDGPU::TTMP_32RegClassID; 2227 case 2: return AMDGPU::TTMP_64RegClassID; 2228 case 4: return AMDGPU::TTMP_128RegClassID; 2229 case 8: return AMDGPU::TTMP_256RegClassID; 2230 case 16: return AMDGPU::TTMP_512RegClassID; 2231 } 2232 } else if (Is == IS_SGPR) { 2233 switch (RegWidth) { 2234 default: return -1; 2235 case 1: return AMDGPU::SGPR_32RegClassID; 2236 case 2: return AMDGPU::SGPR_64RegClassID; 2237 case 3: return AMDGPU::SGPR_96RegClassID; 2238 case 4: return AMDGPU::SGPR_128RegClassID; 2239 case 5: return AMDGPU::SGPR_160RegClassID; 2240 case 6: return AMDGPU::SGPR_192RegClassID; 2241 case 7: return AMDGPU::SGPR_224RegClassID; 2242 case 8: return AMDGPU::SGPR_256RegClassID; 2243 case 16: return AMDGPU::SGPR_512RegClassID; 2244 } 2245 } else if (Is == IS_AGPR) { 2246 switch (RegWidth) { 2247 default: return -1; 2248 case 1: return AMDGPU::AGPR_32RegClassID; 2249 case 2: return AMDGPU::AReg_64RegClassID; 2250 case 3: return AMDGPU::AReg_96RegClassID; 2251 case 4: return AMDGPU::AReg_128RegClassID; 2252 case 5: return AMDGPU::AReg_160RegClassID; 2253 case 6: return AMDGPU::AReg_192RegClassID; 2254 case 7: return AMDGPU::AReg_224RegClassID; 2255 case 8: return AMDGPU::AReg_256RegClassID; 2256 case 16: return AMDGPU::AReg_512RegClassID; 2257 case 32: return AMDGPU::AReg_1024RegClassID; 2258 } 2259 } 2260 return -1; 2261 } 2262 2263 static unsigned getSpecialRegForName(StringRef RegName) { 2264 return StringSwitch<unsigned>(RegName) 2265 .Case("exec", AMDGPU::EXEC) 2266 .Case("vcc", AMDGPU::VCC) 2267 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2268 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2269 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2270 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2271 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2272 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2273 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2274 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2275 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2276 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2277 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2278 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2279 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2280 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2281 .Case("m0", AMDGPU::M0) 2282 .Case("vccz", AMDGPU::SRC_VCCZ) 2283 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2284 .Case("execz", AMDGPU::SRC_EXECZ) 2285 .Case("src_execz", AMDGPU::SRC_EXECZ) 2286 .Case("scc", AMDGPU::SRC_SCC) 2287 .Case("src_scc", AMDGPU::SRC_SCC) 2288 .Case("tba", AMDGPU::TBA) 2289 .Case("tma", AMDGPU::TMA) 2290 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2291 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2292 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2293 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2294 .Case("vcc_lo", AMDGPU::VCC_LO) 2295 .Case("vcc_hi", AMDGPU::VCC_HI) 2296 .Case("exec_lo", AMDGPU::EXEC_LO) 2297 .Case("exec_hi", AMDGPU::EXEC_HI) 2298 .Case("tma_lo", AMDGPU::TMA_LO) 2299 .Case("tma_hi", AMDGPU::TMA_HI) 2300 .Case("tba_lo", AMDGPU::TBA_LO) 2301 .Case("tba_hi", AMDGPU::TBA_HI) 2302 .Case("pc", AMDGPU::PC_REG) 2303 .Case("null", AMDGPU::SGPR_NULL) 2304 .Default(AMDGPU::NoRegister); 2305 } 2306 2307 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2308 SMLoc &EndLoc, bool RestoreOnFailure) { 2309 auto R = parseRegister(); 2310 if (!R) return true; 2311 assert(R->isReg()); 2312 RegNo = R->getReg(); 2313 StartLoc = R->getStartLoc(); 2314 EndLoc = R->getEndLoc(); 2315 return false; 2316 } 2317 2318 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2319 SMLoc &EndLoc) { 2320 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2321 } 2322 2323 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2324 SMLoc &StartLoc, 2325 SMLoc &EndLoc) { 2326 bool Result = 2327 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2328 bool PendingErrors = getParser().hasPendingError(); 2329 getParser().clearPendingErrors(); 2330 if (PendingErrors) 2331 return MatchOperand_ParseFail; 2332 if (Result) 2333 return MatchOperand_NoMatch; 2334 return MatchOperand_Success; 2335 } 2336 2337 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2338 RegisterKind RegKind, unsigned Reg1, 2339 SMLoc Loc) { 2340 switch (RegKind) { 2341 case IS_SPECIAL: 2342 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2343 Reg = AMDGPU::EXEC; 2344 RegWidth = 2; 2345 return true; 2346 } 2347 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2348 Reg = AMDGPU::FLAT_SCR; 2349 RegWidth = 2; 2350 return true; 2351 } 2352 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2353 Reg = AMDGPU::XNACK_MASK; 2354 RegWidth = 2; 2355 return true; 2356 } 2357 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2358 Reg = AMDGPU::VCC; 2359 RegWidth = 2; 2360 return true; 2361 } 2362 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2363 Reg = AMDGPU::TBA; 2364 RegWidth = 2; 2365 return true; 2366 } 2367 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2368 Reg = AMDGPU::TMA; 2369 RegWidth = 2; 2370 return true; 2371 } 2372 Error(Loc, "register does not fit in the list"); 2373 return false; 2374 case IS_VGPR: 2375 case IS_SGPR: 2376 case IS_AGPR: 2377 case IS_TTMP: 2378 if (Reg1 != Reg + RegWidth) { 2379 Error(Loc, "registers in a list must have consecutive indices"); 2380 return false; 2381 } 2382 RegWidth++; 2383 return true; 2384 default: 2385 llvm_unreachable("unexpected register kind"); 2386 } 2387 } 2388 2389 struct RegInfo { 2390 StringLiteral Name; 2391 RegisterKind Kind; 2392 }; 2393 2394 static constexpr RegInfo RegularRegisters[] = { 2395 {{"v"}, IS_VGPR}, 2396 {{"s"}, IS_SGPR}, 2397 {{"ttmp"}, IS_TTMP}, 2398 {{"acc"}, IS_AGPR}, 2399 {{"a"}, IS_AGPR}, 2400 }; 2401 2402 static bool isRegularReg(RegisterKind Kind) { 2403 return Kind == IS_VGPR || 2404 Kind == IS_SGPR || 2405 Kind == IS_TTMP || 2406 Kind == IS_AGPR; 2407 } 2408 2409 static const RegInfo* getRegularRegInfo(StringRef Str) { 2410 for (const RegInfo &Reg : RegularRegisters) 2411 if (Str.startswith(Reg.Name)) 2412 return &Reg; 2413 return nullptr; 2414 } 2415 2416 static bool getRegNum(StringRef Str, unsigned& Num) { 2417 return !Str.getAsInteger(10, Num); 2418 } 2419 2420 bool 2421 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2422 const AsmToken &NextToken) const { 2423 2424 // A list of consecutive registers: [s0,s1,s2,s3] 2425 if (Token.is(AsmToken::LBrac)) 2426 return true; 2427 2428 if (!Token.is(AsmToken::Identifier)) 2429 return false; 2430 2431 // A single register like s0 or a range of registers like s[0:1] 2432 2433 StringRef Str = Token.getString(); 2434 const RegInfo *Reg = getRegularRegInfo(Str); 2435 if (Reg) { 2436 StringRef RegName = Reg->Name; 2437 StringRef RegSuffix = Str.substr(RegName.size()); 2438 if (!RegSuffix.empty()) { 2439 unsigned Num; 2440 // A single register with an index: rXX 2441 if (getRegNum(RegSuffix, Num)) 2442 return true; 2443 } else { 2444 // A range of registers: r[XX:YY]. 2445 if (NextToken.is(AsmToken::LBrac)) 2446 return true; 2447 } 2448 } 2449 2450 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2451 } 2452 2453 bool 2454 AMDGPUAsmParser::isRegister() 2455 { 2456 return isRegister(getToken(), peekToken()); 2457 } 2458 2459 unsigned 2460 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2461 unsigned RegNum, 2462 unsigned RegWidth, 2463 SMLoc Loc) { 2464 2465 assert(isRegularReg(RegKind)); 2466 2467 unsigned AlignSize = 1; 2468 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2469 // SGPR and TTMP registers must be aligned. 2470 // Max required alignment is 4 dwords. 2471 AlignSize = std::min(RegWidth, 4u); 2472 } 2473 2474 if (RegNum % AlignSize != 0) { 2475 Error(Loc, "invalid register alignment"); 2476 return AMDGPU::NoRegister; 2477 } 2478 2479 unsigned RegIdx = RegNum / AlignSize; 2480 int RCID = getRegClass(RegKind, RegWidth); 2481 if (RCID == -1) { 2482 Error(Loc, "invalid or unsupported register size"); 2483 return AMDGPU::NoRegister; 2484 } 2485 2486 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2487 const MCRegisterClass RC = TRI->getRegClass(RCID); 2488 if (RegIdx >= RC.getNumRegs()) { 2489 Error(Loc, "register index is out of range"); 2490 return AMDGPU::NoRegister; 2491 } 2492 2493 return RC.getRegister(RegIdx); 2494 } 2495 2496 bool 2497 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2498 int64_t RegLo, RegHi; 2499 if (!skipToken(AsmToken::LBrac, "missing register index")) 2500 return false; 2501 2502 SMLoc FirstIdxLoc = getLoc(); 2503 SMLoc SecondIdxLoc; 2504 2505 if (!parseExpr(RegLo)) 2506 return false; 2507 2508 if (trySkipToken(AsmToken::Colon)) { 2509 SecondIdxLoc = getLoc(); 2510 if (!parseExpr(RegHi)) 2511 return false; 2512 } else { 2513 RegHi = RegLo; 2514 } 2515 2516 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2517 return false; 2518 2519 if (!isUInt<32>(RegLo)) { 2520 Error(FirstIdxLoc, "invalid register index"); 2521 return false; 2522 } 2523 2524 if (!isUInt<32>(RegHi)) { 2525 Error(SecondIdxLoc, "invalid register index"); 2526 return false; 2527 } 2528 2529 if (RegLo > RegHi) { 2530 Error(FirstIdxLoc, "first register index should not exceed second index"); 2531 return false; 2532 } 2533 2534 Num = static_cast<unsigned>(RegLo); 2535 Width = (RegHi - RegLo) + 1; 2536 return true; 2537 } 2538 2539 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2540 unsigned &RegNum, unsigned &RegWidth, 2541 SmallVectorImpl<AsmToken> &Tokens) { 2542 assert(isToken(AsmToken::Identifier)); 2543 unsigned Reg = getSpecialRegForName(getTokenStr()); 2544 if (Reg) { 2545 RegNum = 0; 2546 RegWidth = 1; 2547 RegKind = IS_SPECIAL; 2548 Tokens.push_back(getToken()); 2549 lex(); // skip register name 2550 } 2551 return Reg; 2552 } 2553 2554 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2555 unsigned &RegNum, unsigned &RegWidth, 2556 SmallVectorImpl<AsmToken> &Tokens) { 2557 assert(isToken(AsmToken::Identifier)); 2558 StringRef RegName = getTokenStr(); 2559 auto Loc = getLoc(); 2560 2561 const RegInfo *RI = getRegularRegInfo(RegName); 2562 if (!RI) { 2563 Error(Loc, "invalid register name"); 2564 return AMDGPU::NoRegister; 2565 } 2566 2567 Tokens.push_back(getToken()); 2568 lex(); // skip register name 2569 2570 RegKind = RI->Kind; 2571 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2572 if (!RegSuffix.empty()) { 2573 // Single 32-bit register: vXX. 2574 if (!getRegNum(RegSuffix, RegNum)) { 2575 Error(Loc, "invalid register index"); 2576 return AMDGPU::NoRegister; 2577 } 2578 RegWidth = 1; 2579 } else { 2580 // Range of registers: v[XX:YY]. ":YY" is optional. 2581 if (!ParseRegRange(RegNum, RegWidth)) 2582 return AMDGPU::NoRegister; 2583 } 2584 2585 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2586 } 2587 2588 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2589 unsigned &RegWidth, 2590 SmallVectorImpl<AsmToken> &Tokens) { 2591 unsigned Reg = AMDGPU::NoRegister; 2592 auto ListLoc = getLoc(); 2593 2594 if (!skipToken(AsmToken::LBrac, 2595 "expected a register or a list of registers")) { 2596 return AMDGPU::NoRegister; 2597 } 2598 2599 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2600 2601 auto Loc = getLoc(); 2602 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2603 return AMDGPU::NoRegister; 2604 if (RegWidth != 1) { 2605 Error(Loc, "expected a single 32-bit register"); 2606 return AMDGPU::NoRegister; 2607 } 2608 2609 for (; trySkipToken(AsmToken::Comma); ) { 2610 RegisterKind NextRegKind; 2611 unsigned NextReg, NextRegNum, NextRegWidth; 2612 Loc = getLoc(); 2613 2614 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2615 NextRegNum, NextRegWidth, 2616 Tokens)) { 2617 return AMDGPU::NoRegister; 2618 } 2619 if (NextRegWidth != 1) { 2620 Error(Loc, "expected a single 32-bit register"); 2621 return AMDGPU::NoRegister; 2622 } 2623 if (NextRegKind != RegKind) { 2624 Error(Loc, "registers in a list must be of the same kind"); 2625 return AMDGPU::NoRegister; 2626 } 2627 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2628 return AMDGPU::NoRegister; 2629 } 2630 2631 if (!skipToken(AsmToken::RBrac, 2632 "expected a comma or a closing square bracket")) { 2633 return AMDGPU::NoRegister; 2634 } 2635 2636 if (isRegularReg(RegKind)) 2637 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2638 2639 return Reg; 2640 } 2641 2642 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2643 unsigned &RegNum, unsigned &RegWidth, 2644 SmallVectorImpl<AsmToken> &Tokens) { 2645 auto Loc = getLoc(); 2646 Reg = AMDGPU::NoRegister; 2647 2648 if (isToken(AsmToken::Identifier)) { 2649 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2650 if (Reg == AMDGPU::NoRegister) 2651 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2652 } else { 2653 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2654 } 2655 2656 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2657 if (Reg == AMDGPU::NoRegister) { 2658 assert(Parser.hasPendingError()); 2659 return false; 2660 } 2661 2662 if (!subtargetHasRegister(*TRI, Reg)) { 2663 if (Reg == AMDGPU::SGPR_NULL) { 2664 Error(Loc, "'null' operand is not supported on this GPU"); 2665 } else { 2666 Error(Loc, "register not available on this GPU"); 2667 } 2668 return false; 2669 } 2670 2671 return true; 2672 } 2673 2674 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2675 unsigned &RegNum, unsigned &RegWidth, 2676 bool RestoreOnFailure /*=false*/) { 2677 Reg = AMDGPU::NoRegister; 2678 2679 SmallVector<AsmToken, 1> Tokens; 2680 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2681 if (RestoreOnFailure) { 2682 while (!Tokens.empty()) { 2683 getLexer().UnLex(Tokens.pop_back_val()); 2684 } 2685 } 2686 return true; 2687 } 2688 return false; 2689 } 2690 2691 Optional<StringRef> 2692 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2693 switch (RegKind) { 2694 case IS_VGPR: 2695 return StringRef(".amdgcn.next_free_vgpr"); 2696 case IS_SGPR: 2697 return StringRef(".amdgcn.next_free_sgpr"); 2698 default: 2699 return None; 2700 } 2701 } 2702 2703 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2704 auto SymbolName = getGprCountSymbolName(RegKind); 2705 assert(SymbolName && "initializing invalid register kind"); 2706 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2707 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2708 } 2709 2710 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2711 unsigned DwordRegIndex, 2712 unsigned RegWidth) { 2713 // Symbols are only defined for GCN targets 2714 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2715 return true; 2716 2717 auto SymbolName = getGprCountSymbolName(RegKind); 2718 if (!SymbolName) 2719 return true; 2720 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2721 2722 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2723 int64_t OldCount; 2724 2725 if (!Sym->isVariable()) 2726 return !Error(getLoc(), 2727 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2728 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2729 return !Error( 2730 getLoc(), 2731 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2732 2733 if (OldCount <= NewMax) 2734 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2735 2736 return true; 2737 } 2738 2739 std::unique_ptr<AMDGPUOperand> 2740 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2741 const auto &Tok = getToken(); 2742 SMLoc StartLoc = Tok.getLoc(); 2743 SMLoc EndLoc = Tok.getEndLoc(); 2744 RegisterKind RegKind; 2745 unsigned Reg, RegNum, RegWidth; 2746 2747 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2748 return nullptr; 2749 } 2750 if (isHsaAbiVersion3Or4(&getSTI())) { 2751 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2752 return nullptr; 2753 } else 2754 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2755 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2756 } 2757 2758 OperandMatchResultTy 2759 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2760 // TODO: add syntactic sugar for 1/(2*PI) 2761 2762 assert(!isRegister()); 2763 assert(!isModifier()); 2764 2765 const auto& Tok = getToken(); 2766 const auto& NextTok = peekToken(); 2767 bool IsReal = Tok.is(AsmToken::Real); 2768 SMLoc S = getLoc(); 2769 bool Negate = false; 2770 2771 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2772 lex(); 2773 IsReal = true; 2774 Negate = true; 2775 } 2776 2777 if (IsReal) { 2778 // Floating-point expressions are not supported. 2779 // Can only allow floating-point literals with an 2780 // optional sign. 2781 2782 StringRef Num = getTokenStr(); 2783 lex(); 2784 2785 APFloat RealVal(APFloat::IEEEdouble()); 2786 auto roundMode = APFloat::rmNearestTiesToEven; 2787 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2788 return MatchOperand_ParseFail; 2789 } 2790 if (Negate) 2791 RealVal.changeSign(); 2792 2793 Operands.push_back( 2794 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2795 AMDGPUOperand::ImmTyNone, true)); 2796 2797 return MatchOperand_Success; 2798 2799 } else { 2800 int64_t IntVal; 2801 const MCExpr *Expr; 2802 SMLoc S = getLoc(); 2803 2804 if (HasSP3AbsModifier) { 2805 // This is a workaround for handling expressions 2806 // as arguments of SP3 'abs' modifier, for example: 2807 // |1.0| 2808 // |-1| 2809 // |1+x| 2810 // This syntax is not compatible with syntax of standard 2811 // MC expressions (due to the trailing '|'). 2812 SMLoc EndLoc; 2813 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2814 return MatchOperand_ParseFail; 2815 } else { 2816 if (Parser.parseExpression(Expr)) 2817 return MatchOperand_ParseFail; 2818 } 2819 2820 if (Expr->evaluateAsAbsolute(IntVal)) { 2821 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2822 } else { 2823 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2824 } 2825 2826 return MatchOperand_Success; 2827 } 2828 2829 return MatchOperand_NoMatch; 2830 } 2831 2832 OperandMatchResultTy 2833 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2834 if (!isRegister()) 2835 return MatchOperand_NoMatch; 2836 2837 if (auto R = parseRegister()) { 2838 assert(R->isReg()); 2839 Operands.push_back(std::move(R)); 2840 return MatchOperand_Success; 2841 } 2842 return MatchOperand_ParseFail; 2843 } 2844 2845 OperandMatchResultTy 2846 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2847 auto res = parseReg(Operands); 2848 if (res != MatchOperand_NoMatch) { 2849 return res; 2850 } else if (isModifier()) { 2851 return MatchOperand_NoMatch; 2852 } else { 2853 return parseImm(Operands, HasSP3AbsMod); 2854 } 2855 } 2856 2857 bool 2858 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2859 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2860 const auto &str = Token.getString(); 2861 return str == "abs" || str == "neg" || str == "sext"; 2862 } 2863 return false; 2864 } 2865 2866 bool 2867 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2868 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2869 } 2870 2871 bool 2872 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2873 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2874 } 2875 2876 bool 2877 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2878 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2879 } 2880 2881 // Check if this is an operand modifier or an opcode modifier 2882 // which may look like an expression but it is not. We should 2883 // avoid parsing these modifiers as expressions. Currently 2884 // recognized sequences are: 2885 // |...| 2886 // abs(...) 2887 // neg(...) 2888 // sext(...) 2889 // -reg 2890 // -|...| 2891 // -abs(...) 2892 // name:... 2893 // Note that simple opcode modifiers like 'gds' may be parsed as 2894 // expressions; this is a special case. See getExpressionAsToken. 2895 // 2896 bool 2897 AMDGPUAsmParser::isModifier() { 2898 2899 AsmToken Tok = getToken(); 2900 AsmToken NextToken[2]; 2901 peekTokens(NextToken); 2902 2903 return isOperandModifier(Tok, NextToken[0]) || 2904 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2905 isOpcodeModifierWithVal(Tok, NextToken[0]); 2906 } 2907 2908 // Check if the current token is an SP3 'neg' modifier. 2909 // Currently this modifier is allowed in the following context: 2910 // 2911 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2912 // 2. Before an 'abs' modifier: -abs(...) 2913 // 3. Before an SP3 'abs' modifier: -|...| 2914 // 2915 // In all other cases "-" is handled as a part 2916 // of an expression that follows the sign. 2917 // 2918 // Note: When "-" is followed by an integer literal, 2919 // this is interpreted as integer negation rather 2920 // than a floating-point NEG modifier applied to N. 2921 // Beside being contr-intuitive, such use of floating-point 2922 // NEG modifier would have resulted in different meaning 2923 // of integer literals used with VOP1/2/C and VOP3, 2924 // for example: 2925 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2926 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2927 // Negative fp literals with preceding "-" are 2928 // handled likewise for unifomtity 2929 // 2930 bool 2931 AMDGPUAsmParser::parseSP3NegModifier() { 2932 2933 AsmToken NextToken[2]; 2934 peekTokens(NextToken); 2935 2936 if (isToken(AsmToken::Minus) && 2937 (isRegister(NextToken[0], NextToken[1]) || 2938 NextToken[0].is(AsmToken::Pipe) || 2939 isId(NextToken[0], "abs"))) { 2940 lex(); 2941 return true; 2942 } 2943 2944 return false; 2945 } 2946 2947 OperandMatchResultTy 2948 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2949 bool AllowImm) { 2950 bool Neg, SP3Neg; 2951 bool Abs, SP3Abs; 2952 SMLoc Loc; 2953 2954 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2955 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2956 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2957 return MatchOperand_ParseFail; 2958 } 2959 2960 SP3Neg = parseSP3NegModifier(); 2961 2962 Loc = getLoc(); 2963 Neg = trySkipId("neg"); 2964 if (Neg && SP3Neg) { 2965 Error(Loc, "expected register or immediate"); 2966 return MatchOperand_ParseFail; 2967 } 2968 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2969 return MatchOperand_ParseFail; 2970 2971 Abs = trySkipId("abs"); 2972 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2973 return MatchOperand_ParseFail; 2974 2975 Loc = getLoc(); 2976 SP3Abs = trySkipToken(AsmToken::Pipe); 2977 if (Abs && SP3Abs) { 2978 Error(Loc, "expected register or immediate"); 2979 return MatchOperand_ParseFail; 2980 } 2981 2982 OperandMatchResultTy Res; 2983 if (AllowImm) { 2984 Res = parseRegOrImm(Operands, SP3Abs); 2985 } else { 2986 Res = parseReg(Operands); 2987 } 2988 if (Res != MatchOperand_Success) { 2989 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2990 } 2991 2992 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2993 return MatchOperand_ParseFail; 2994 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2995 return MatchOperand_ParseFail; 2996 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2997 return MatchOperand_ParseFail; 2998 2999 AMDGPUOperand::Modifiers Mods; 3000 Mods.Abs = Abs || SP3Abs; 3001 Mods.Neg = Neg || SP3Neg; 3002 3003 if (Mods.hasFPModifiers()) { 3004 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3005 if (Op.isExpr()) { 3006 Error(Op.getStartLoc(), "expected an absolute expression"); 3007 return MatchOperand_ParseFail; 3008 } 3009 Op.setModifiers(Mods); 3010 } 3011 return MatchOperand_Success; 3012 } 3013 3014 OperandMatchResultTy 3015 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3016 bool AllowImm) { 3017 bool Sext = trySkipId("sext"); 3018 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3019 return MatchOperand_ParseFail; 3020 3021 OperandMatchResultTy Res; 3022 if (AllowImm) { 3023 Res = parseRegOrImm(Operands); 3024 } else { 3025 Res = parseReg(Operands); 3026 } 3027 if (Res != MatchOperand_Success) { 3028 return Sext? MatchOperand_ParseFail : Res; 3029 } 3030 3031 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3032 return MatchOperand_ParseFail; 3033 3034 AMDGPUOperand::Modifiers Mods; 3035 Mods.Sext = Sext; 3036 3037 if (Mods.hasIntModifiers()) { 3038 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3039 if (Op.isExpr()) { 3040 Error(Op.getStartLoc(), "expected an absolute expression"); 3041 return MatchOperand_ParseFail; 3042 } 3043 Op.setModifiers(Mods); 3044 } 3045 3046 return MatchOperand_Success; 3047 } 3048 3049 OperandMatchResultTy 3050 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3051 return parseRegOrImmWithFPInputMods(Operands, false); 3052 } 3053 3054 OperandMatchResultTy 3055 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3056 return parseRegOrImmWithIntInputMods(Operands, false); 3057 } 3058 3059 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3060 auto Loc = getLoc(); 3061 if (trySkipId("off")) { 3062 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3063 AMDGPUOperand::ImmTyOff, false)); 3064 return MatchOperand_Success; 3065 } 3066 3067 if (!isRegister()) 3068 return MatchOperand_NoMatch; 3069 3070 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3071 if (Reg) { 3072 Operands.push_back(std::move(Reg)); 3073 return MatchOperand_Success; 3074 } 3075 3076 return MatchOperand_ParseFail; 3077 3078 } 3079 3080 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3081 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3082 3083 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3084 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3085 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3086 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3087 return Match_InvalidOperand; 3088 3089 if ((TSFlags & SIInstrFlags::VOP3) && 3090 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3091 getForcedEncodingSize() != 64) 3092 return Match_PreferE32; 3093 3094 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3095 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3096 // v_mac_f32/16 allow only dst_sel == DWORD; 3097 auto OpNum = 3098 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3099 const auto &Op = Inst.getOperand(OpNum); 3100 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3101 return Match_InvalidOperand; 3102 } 3103 } 3104 3105 return Match_Success; 3106 } 3107 3108 static ArrayRef<unsigned> getAllVariants() { 3109 static const unsigned Variants[] = { 3110 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3111 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3112 }; 3113 3114 return makeArrayRef(Variants); 3115 } 3116 3117 // What asm variants we should check 3118 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3119 if (getForcedEncodingSize() == 32) { 3120 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3121 return makeArrayRef(Variants); 3122 } 3123 3124 if (isForcedVOP3()) { 3125 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3126 return makeArrayRef(Variants); 3127 } 3128 3129 if (isForcedSDWA()) { 3130 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3131 AMDGPUAsmVariants::SDWA9}; 3132 return makeArrayRef(Variants); 3133 } 3134 3135 if (isForcedDPP()) { 3136 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3137 return makeArrayRef(Variants); 3138 } 3139 3140 return getAllVariants(); 3141 } 3142 3143 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3144 if (getForcedEncodingSize() == 32) 3145 return "e32"; 3146 3147 if (isForcedVOP3()) 3148 return "e64"; 3149 3150 if (isForcedSDWA()) 3151 return "sdwa"; 3152 3153 if (isForcedDPP()) 3154 return "dpp"; 3155 3156 return ""; 3157 } 3158 3159 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3160 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3161 const unsigned Num = Desc.getNumImplicitUses(); 3162 for (unsigned i = 0; i < Num; ++i) { 3163 unsigned Reg = Desc.ImplicitUses[i]; 3164 switch (Reg) { 3165 case AMDGPU::FLAT_SCR: 3166 case AMDGPU::VCC: 3167 case AMDGPU::VCC_LO: 3168 case AMDGPU::VCC_HI: 3169 case AMDGPU::M0: 3170 return Reg; 3171 default: 3172 break; 3173 } 3174 } 3175 return AMDGPU::NoRegister; 3176 } 3177 3178 // NB: This code is correct only when used to check constant 3179 // bus limitations because GFX7 support no f16 inline constants. 3180 // Note that there are no cases when a GFX7 opcode violates 3181 // constant bus limitations due to the use of an f16 constant. 3182 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3183 unsigned OpIdx) const { 3184 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3185 3186 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3187 return false; 3188 } 3189 3190 const MCOperand &MO = Inst.getOperand(OpIdx); 3191 3192 int64_t Val = MO.getImm(); 3193 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3194 3195 switch (OpSize) { // expected operand size 3196 case 8: 3197 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3198 case 4: 3199 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3200 case 2: { 3201 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3202 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3203 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3204 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3205 return AMDGPU::isInlinableIntLiteral(Val); 3206 3207 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3208 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3209 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3210 return AMDGPU::isInlinableIntLiteralV216(Val); 3211 3212 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3213 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3214 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3215 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3216 3217 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3218 } 3219 default: 3220 llvm_unreachable("invalid operand size"); 3221 } 3222 } 3223 3224 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3225 if (!isGFX10Plus()) 3226 return 1; 3227 3228 switch (Opcode) { 3229 // 64-bit shift instructions can use only one scalar value input 3230 case AMDGPU::V_LSHLREV_B64_e64: 3231 case AMDGPU::V_LSHLREV_B64_gfx10: 3232 case AMDGPU::V_LSHRREV_B64_e64: 3233 case AMDGPU::V_LSHRREV_B64_gfx10: 3234 case AMDGPU::V_ASHRREV_I64_e64: 3235 case AMDGPU::V_ASHRREV_I64_gfx10: 3236 case AMDGPU::V_LSHL_B64_e64: 3237 case AMDGPU::V_LSHR_B64_e64: 3238 case AMDGPU::V_ASHR_I64_e64: 3239 return 1; 3240 default: 3241 return 2; 3242 } 3243 } 3244 3245 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3246 const MCOperand &MO = Inst.getOperand(OpIdx); 3247 if (MO.isImm()) { 3248 return !isInlineConstant(Inst, OpIdx); 3249 } else if (MO.isReg()) { 3250 auto Reg = MO.getReg(); 3251 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3252 auto PReg = mc2PseudoReg(Reg); 3253 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3254 } else { 3255 return true; 3256 } 3257 } 3258 3259 bool 3260 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3261 const OperandVector &Operands) { 3262 const unsigned Opcode = Inst.getOpcode(); 3263 const MCInstrDesc &Desc = MII.get(Opcode); 3264 unsigned LastSGPR = AMDGPU::NoRegister; 3265 unsigned ConstantBusUseCount = 0; 3266 unsigned NumLiterals = 0; 3267 unsigned LiteralSize; 3268 3269 if (Desc.TSFlags & 3270 (SIInstrFlags::VOPC | 3271 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3272 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3273 SIInstrFlags::SDWA)) { 3274 // Check special imm operands (used by madmk, etc) 3275 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3276 ++NumLiterals; 3277 LiteralSize = 4; 3278 } 3279 3280 SmallDenseSet<unsigned> SGPRsUsed; 3281 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3282 if (SGPRUsed != AMDGPU::NoRegister) { 3283 SGPRsUsed.insert(SGPRUsed); 3284 ++ConstantBusUseCount; 3285 } 3286 3287 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3288 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3289 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3290 3291 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3292 3293 for (int OpIdx : OpIndices) { 3294 if (OpIdx == -1) break; 3295 3296 const MCOperand &MO = Inst.getOperand(OpIdx); 3297 if (usesConstantBus(Inst, OpIdx)) { 3298 if (MO.isReg()) { 3299 LastSGPR = mc2PseudoReg(MO.getReg()); 3300 // Pairs of registers with a partial intersections like these 3301 // s0, s[0:1] 3302 // flat_scratch_lo, flat_scratch 3303 // flat_scratch_lo, flat_scratch_hi 3304 // are theoretically valid but they are disabled anyway. 3305 // Note that this code mimics SIInstrInfo::verifyInstruction 3306 if (!SGPRsUsed.count(LastSGPR)) { 3307 SGPRsUsed.insert(LastSGPR); 3308 ++ConstantBusUseCount; 3309 } 3310 } else { // Expression or a literal 3311 3312 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3313 continue; // special operand like VINTERP attr_chan 3314 3315 // An instruction may use only one literal. 3316 // This has been validated on the previous step. 3317 // See validateVOPLiteral. 3318 // This literal may be used as more than one operand. 3319 // If all these operands are of the same size, 3320 // this literal counts as one scalar value. 3321 // Otherwise it counts as 2 scalar values. 3322 // See "GFX10 Shader Programming", section 3.6.2.3. 3323 3324 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3325 if (Size < 4) Size = 4; 3326 3327 if (NumLiterals == 0) { 3328 NumLiterals = 1; 3329 LiteralSize = Size; 3330 } else if (LiteralSize != Size) { 3331 NumLiterals = 2; 3332 } 3333 } 3334 } 3335 } 3336 } 3337 ConstantBusUseCount += NumLiterals; 3338 3339 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3340 return true; 3341 3342 SMLoc LitLoc = getLitLoc(Operands); 3343 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3344 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3345 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3346 return false; 3347 } 3348 3349 bool 3350 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3351 const OperandVector &Operands) { 3352 const unsigned Opcode = Inst.getOpcode(); 3353 const MCInstrDesc &Desc = MII.get(Opcode); 3354 3355 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3356 if (DstIdx == -1 || 3357 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3358 return true; 3359 } 3360 3361 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3362 3363 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3364 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3365 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3366 3367 assert(DstIdx != -1); 3368 const MCOperand &Dst = Inst.getOperand(DstIdx); 3369 assert(Dst.isReg()); 3370 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3371 3372 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3373 3374 for (int SrcIdx : SrcIndices) { 3375 if (SrcIdx == -1) break; 3376 const MCOperand &Src = Inst.getOperand(SrcIdx); 3377 if (Src.isReg()) { 3378 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3379 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3380 Error(getRegLoc(SrcReg, Operands), 3381 "destination must be different than all sources"); 3382 return false; 3383 } 3384 } 3385 } 3386 3387 return true; 3388 } 3389 3390 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3391 3392 const unsigned Opc = Inst.getOpcode(); 3393 const MCInstrDesc &Desc = MII.get(Opc); 3394 3395 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3396 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3397 assert(ClampIdx != -1); 3398 return Inst.getOperand(ClampIdx).getImm() == 0; 3399 } 3400 3401 return true; 3402 } 3403 3404 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3405 3406 const unsigned Opc = Inst.getOpcode(); 3407 const MCInstrDesc &Desc = MII.get(Opc); 3408 3409 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3410 return true; 3411 3412 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3413 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3414 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3415 3416 assert(VDataIdx != -1); 3417 3418 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3419 return true; 3420 3421 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3422 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3423 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3424 if (DMask == 0) 3425 DMask = 1; 3426 3427 unsigned DataSize = 3428 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3429 if (hasPackedD16()) { 3430 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3431 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3432 DataSize = (DataSize + 1) / 2; 3433 } 3434 3435 return (VDataSize / 4) == DataSize + TFESize; 3436 } 3437 3438 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3439 const unsigned Opc = Inst.getOpcode(); 3440 const MCInstrDesc &Desc = MII.get(Opc); 3441 3442 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3443 return true; 3444 3445 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3446 3447 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3448 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3449 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3450 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3451 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3452 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3453 3454 assert(VAddr0Idx != -1); 3455 assert(SrsrcIdx != -1); 3456 assert(SrsrcIdx > VAddr0Idx); 3457 3458 if (DimIdx == -1) 3459 return true; // intersect_ray 3460 3461 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3462 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3463 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3464 unsigned ActualAddrSize = 3465 IsNSA ? SrsrcIdx - VAddr0Idx 3466 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3467 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3468 3469 unsigned ExpectedAddrSize = 3470 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3471 3472 if (!IsNSA) { 3473 if (ExpectedAddrSize > 8) 3474 ExpectedAddrSize = 16; 3475 3476 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3477 // This provides backward compatibility for assembly created 3478 // before 160b/192b/224b types were directly supported. 3479 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3480 return true; 3481 } 3482 3483 return ActualAddrSize == ExpectedAddrSize; 3484 } 3485 3486 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3487 3488 const unsigned Opc = Inst.getOpcode(); 3489 const MCInstrDesc &Desc = MII.get(Opc); 3490 3491 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3492 return true; 3493 if (!Desc.mayLoad() || !Desc.mayStore()) 3494 return true; // Not atomic 3495 3496 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3497 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3498 3499 // This is an incomplete check because image_atomic_cmpswap 3500 // may only use 0x3 and 0xf while other atomic operations 3501 // may use 0x1 and 0x3. However these limitations are 3502 // verified when we check that dmask matches dst size. 3503 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3504 } 3505 3506 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3507 3508 const unsigned Opc = Inst.getOpcode(); 3509 const MCInstrDesc &Desc = MII.get(Opc); 3510 3511 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3512 return true; 3513 3514 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3515 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3516 3517 // GATHER4 instructions use dmask in a different fashion compared to 3518 // other MIMG instructions. The only useful DMASK values are 3519 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3520 // (red,red,red,red) etc.) The ISA document doesn't mention 3521 // this. 3522 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3523 } 3524 3525 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3526 const unsigned Opc = Inst.getOpcode(); 3527 const MCInstrDesc &Desc = MII.get(Opc); 3528 3529 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3530 return true; 3531 3532 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3533 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3534 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3535 3536 if (!BaseOpcode->MSAA) 3537 return true; 3538 3539 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3540 assert(DimIdx != -1); 3541 3542 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3543 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3544 3545 return DimInfo->MSAA; 3546 } 3547 3548 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3549 { 3550 switch (Opcode) { 3551 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3552 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3553 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3554 return true; 3555 default: 3556 return false; 3557 } 3558 } 3559 3560 // movrels* opcodes should only allow VGPRS as src0. 3561 // This is specified in .td description for vop1/vop3, 3562 // but sdwa is handled differently. See isSDWAOperand. 3563 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3564 const OperandVector &Operands) { 3565 3566 const unsigned Opc = Inst.getOpcode(); 3567 const MCInstrDesc &Desc = MII.get(Opc); 3568 3569 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3570 return true; 3571 3572 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3573 assert(Src0Idx != -1); 3574 3575 SMLoc ErrLoc; 3576 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3577 if (Src0.isReg()) { 3578 auto Reg = mc2PseudoReg(Src0.getReg()); 3579 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3580 if (!isSGPR(Reg, TRI)) 3581 return true; 3582 ErrLoc = getRegLoc(Reg, Operands); 3583 } else { 3584 ErrLoc = getConstLoc(Operands); 3585 } 3586 3587 Error(ErrLoc, "source operand must be a VGPR"); 3588 return false; 3589 } 3590 3591 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3592 const OperandVector &Operands) { 3593 3594 const unsigned Opc = Inst.getOpcode(); 3595 3596 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3597 return true; 3598 3599 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3600 assert(Src0Idx != -1); 3601 3602 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3603 if (!Src0.isReg()) 3604 return true; 3605 3606 auto Reg = mc2PseudoReg(Src0.getReg()); 3607 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3608 if (isSGPR(Reg, TRI)) { 3609 Error(getRegLoc(Reg, Operands), 3610 "source operand must be either a VGPR or an inline constant"); 3611 return false; 3612 } 3613 3614 return true; 3615 } 3616 3617 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3618 const OperandVector &Operands) { 3619 const unsigned Opc = Inst.getOpcode(); 3620 const MCInstrDesc &Desc = MII.get(Opc); 3621 3622 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3623 return true; 3624 3625 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3626 if (Src2Idx == -1) 3627 return true; 3628 3629 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3630 if (!Src2.isReg()) 3631 return true; 3632 3633 MCRegister Src2Reg = Src2.getReg(); 3634 MCRegister DstReg = Inst.getOperand(0).getReg(); 3635 if (Src2Reg == DstReg) 3636 return true; 3637 3638 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3639 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3640 return true; 3641 3642 if (isRegIntersect(Src2Reg, DstReg, TRI)) { 3643 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3644 "source 2 operand must not partially overlap with dst"); 3645 return false; 3646 } 3647 3648 return true; 3649 } 3650 3651 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3652 switch (Inst.getOpcode()) { 3653 default: 3654 return true; 3655 case V_DIV_SCALE_F32_gfx6_gfx7: 3656 case V_DIV_SCALE_F32_vi: 3657 case V_DIV_SCALE_F32_gfx10: 3658 case V_DIV_SCALE_F64_gfx6_gfx7: 3659 case V_DIV_SCALE_F64_vi: 3660 case V_DIV_SCALE_F64_gfx10: 3661 break; 3662 } 3663 3664 // TODO: Check that src0 = src1 or src2. 3665 3666 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3667 AMDGPU::OpName::src2_modifiers, 3668 AMDGPU::OpName::src2_modifiers}) { 3669 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3670 .getImm() & 3671 SISrcMods::ABS) { 3672 return false; 3673 } 3674 } 3675 3676 return true; 3677 } 3678 3679 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3680 3681 const unsigned Opc = Inst.getOpcode(); 3682 const MCInstrDesc &Desc = MII.get(Opc); 3683 3684 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3685 return true; 3686 3687 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3688 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3689 if (isCI() || isSI()) 3690 return false; 3691 } 3692 3693 return true; 3694 } 3695 3696 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3697 const unsigned Opc = Inst.getOpcode(); 3698 const MCInstrDesc &Desc = MII.get(Opc); 3699 3700 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3701 return true; 3702 3703 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3704 if (DimIdx < 0) 3705 return true; 3706 3707 long Imm = Inst.getOperand(DimIdx).getImm(); 3708 if (Imm < 0 || Imm >= 8) 3709 return false; 3710 3711 return true; 3712 } 3713 3714 static bool IsRevOpcode(const unsigned Opcode) 3715 { 3716 switch (Opcode) { 3717 case AMDGPU::V_SUBREV_F32_e32: 3718 case AMDGPU::V_SUBREV_F32_e64: 3719 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3720 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3721 case AMDGPU::V_SUBREV_F32_e32_vi: 3722 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3723 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3724 case AMDGPU::V_SUBREV_F32_e64_vi: 3725 3726 case AMDGPU::V_SUBREV_CO_U32_e32: 3727 case AMDGPU::V_SUBREV_CO_U32_e64: 3728 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3729 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3730 3731 case AMDGPU::V_SUBBREV_U32_e32: 3732 case AMDGPU::V_SUBBREV_U32_e64: 3733 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3734 case AMDGPU::V_SUBBREV_U32_e32_vi: 3735 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3736 case AMDGPU::V_SUBBREV_U32_e64_vi: 3737 3738 case AMDGPU::V_SUBREV_U32_e32: 3739 case AMDGPU::V_SUBREV_U32_e64: 3740 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3741 case AMDGPU::V_SUBREV_U32_e32_vi: 3742 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3743 case AMDGPU::V_SUBREV_U32_e64_vi: 3744 3745 case AMDGPU::V_SUBREV_F16_e32: 3746 case AMDGPU::V_SUBREV_F16_e64: 3747 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3748 case AMDGPU::V_SUBREV_F16_e32_vi: 3749 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3750 case AMDGPU::V_SUBREV_F16_e64_vi: 3751 3752 case AMDGPU::V_SUBREV_U16_e32: 3753 case AMDGPU::V_SUBREV_U16_e64: 3754 case AMDGPU::V_SUBREV_U16_e32_vi: 3755 case AMDGPU::V_SUBREV_U16_e64_vi: 3756 3757 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3758 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3759 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3760 3761 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3762 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3763 3764 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3765 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3766 3767 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3768 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3769 3770 case AMDGPU::V_LSHRREV_B32_e32: 3771 case AMDGPU::V_LSHRREV_B32_e64: 3772 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3773 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3774 case AMDGPU::V_LSHRREV_B32_e32_vi: 3775 case AMDGPU::V_LSHRREV_B32_e64_vi: 3776 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3777 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3778 3779 case AMDGPU::V_ASHRREV_I32_e32: 3780 case AMDGPU::V_ASHRREV_I32_e64: 3781 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3782 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3783 case AMDGPU::V_ASHRREV_I32_e32_vi: 3784 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3785 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3786 case AMDGPU::V_ASHRREV_I32_e64_vi: 3787 3788 case AMDGPU::V_LSHLREV_B32_e32: 3789 case AMDGPU::V_LSHLREV_B32_e64: 3790 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3791 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3792 case AMDGPU::V_LSHLREV_B32_e32_vi: 3793 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3794 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3795 case AMDGPU::V_LSHLREV_B32_e64_vi: 3796 3797 case AMDGPU::V_LSHLREV_B16_e32: 3798 case AMDGPU::V_LSHLREV_B16_e64: 3799 case AMDGPU::V_LSHLREV_B16_e32_vi: 3800 case AMDGPU::V_LSHLREV_B16_e64_vi: 3801 case AMDGPU::V_LSHLREV_B16_gfx10: 3802 3803 case AMDGPU::V_LSHRREV_B16_e32: 3804 case AMDGPU::V_LSHRREV_B16_e64: 3805 case AMDGPU::V_LSHRREV_B16_e32_vi: 3806 case AMDGPU::V_LSHRREV_B16_e64_vi: 3807 case AMDGPU::V_LSHRREV_B16_gfx10: 3808 3809 case AMDGPU::V_ASHRREV_I16_e32: 3810 case AMDGPU::V_ASHRREV_I16_e64: 3811 case AMDGPU::V_ASHRREV_I16_e32_vi: 3812 case AMDGPU::V_ASHRREV_I16_e64_vi: 3813 case AMDGPU::V_ASHRREV_I16_gfx10: 3814 3815 case AMDGPU::V_LSHLREV_B64_e64: 3816 case AMDGPU::V_LSHLREV_B64_gfx10: 3817 case AMDGPU::V_LSHLREV_B64_vi: 3818 3819 case AMDGPU::V_LSHRREV_B64_e64: 3820 case AMDGPU::V_LSHRREV_B64_gfx10: 3821 case AMDGPU::V_LSHRREV_B64_vi: 3822 3823 case AMDGPU::V_ASHRREV_I64_e64: 3824 case AMDGPU::V_ASHRREV_I64_gfx10: 3825 case AMDGPU::V_ASHRREV_I64_vi: 3826 3827 case AMDGPU::V_PK_LSHLREV_B16: 3828 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3829 case AMDGPU::V_PK_LSHLREV_B16_vi: 3830 3831 case AMDGPU::V_PK_LSHRREV_B16: 3832 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3833 case AMDGPU::V_PK_LSHRREV_B16_vi: 3834 case AMDGPU::V_PK_ASHRREV_I16: 3835 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3836 case AMDGPU::V_PK_ASHRREV_I16_vi: 3837 return true; 3838 default: 3839 return false; 3840 } 3841 } 3842 3843 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3844 3845 using namespace SIInstrFlags; 3846 const unsigned Opcode = Inst.getOpcode(); 3847 const MCInstrDesc &Desc = MII.get(Opcode); 3848 3849 // lds_direct register is defined so that it can be used 3850 // with 9-bit operands only. Ignore encodings which do not accept these. 3851 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3852 if ((Desc.TSFlags & Enc) == 0) 3853 return None; 3854 3855 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3856 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3857 if (SrcIdx == -1) 3858 break; 3859 const auto &Src = Inst.getOperand(SrcIdx); 3860 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3861 3862 if (isGFX90A()) 3863 return StringRef("lds_direct is not supported on this GPU"); 3864 3865 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3866 return StringRef("lds_direct cannot be used with this instruction"); 3867 3868 if (SrcName != OpName::src0) 3869 return StringRef("lds_direct may be used as src0 only"); 3870 } 3871 } 3872 3873 return None; 3874 } 3875 3876 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3877 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3878 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3879 if (Op.isFlatOffset()) 3880 return Op.getStartLoc(); 3881 } 3882 return getLoc(); 3883 } 3884 3885 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3886 const OperandVector &Operands) { 3887 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3888 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3889 return true; 3890 3891 auto Opcode = Inst.getOpcode(); 3892 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3893 assert(OpNum != -1); 3894 3895 const auto &Op = Inst.getOperand(OpNum); 3896 if (!hasFlatOffsets() && Op.getImm() != 0) { 3897 Error(getFlatOffsetLoc(Operands), 3898 "flat offset modifier is not supported on this GPU"); 3899 return false; 3900 } 3901 3902 // For FLAT segment the offset must be positive; 3903 // MSB is ignored and forced to zero. 3904 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3905 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3906 if (!isIntN(OffsetSize, Op.getImm())) { 3907 Error(getFlatOffsetLoc(Operands), 3908 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3909 return false; 3910 } 3911 } else { 3912 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3913 if (!isUIntN(OffsetSize, Op.getImm())) { 3914 Error(getFlatOffsetLoc(Operands), 3915 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3916 return false; 3917 } 3918 } 3919 3920 return true; 3921 } 3922 3923 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3924 // Start with second operand because SMEM Offset cannot be dst or src0. 3925 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3926 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3927 if (Op.isSMEMOffset()) 3928 return Op.getStartLoc(); 3929 } 3930 return getLoc(); 3931 } 3932 3933 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3934 const OperandVector &Operands) { 3935 if (isCI() || isSI()) 3936 return true; 3937 3938 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3939 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3940 return true; 3941 3942 auto Opcode = Inst.getOpcode(); 3943 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3944 if (OpNum == -1) 3945 return true; 3946 3947 const auto &Op = Inst.getOperand(OpNum); 3948 if (!Op.isImm()) 3949 return true; 3950 3951 uint64_t Offset = Op.getImm(); 3952 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3953 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3954 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3955 return true; 3956 3957 Error(getSMEMOffsetLoc(Operands), 3958 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3959 "expected a 21-bit signed offset"); 3960 3961 return false; 3962 } 3963 3964 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3965 unsigned Opcode = Inst.getOpcode(); 3966 const MCInstrDesc &Desc = MII.get(Opcode); 3967 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3968 return true; 3969 3970 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3971 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3972 3973 const int OpIndices[] = { Src0Idx, Src1Idx }; 3974 3975 unsigned NumExprs = 0; 3976 unsigned NumLiterals = 0; 3977 uint32_t LiteralValue; 3978 3979 for (int OpIdx : OpIndices) { 3980 if (OpIdx == -1) break; 3981 3982 const MCOperand &MO = Inst.getOperand(OpIdx); 3983 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3984 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3985 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3986 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3987 if (NumLiterals == 0 || LiteralValue != Value) { 3988 LiteralValue = Value; 3989 ++NumLiterals; 3990 } 3991 } else if (MO.isExpr()) { 3992 ++NumExprs; 3993 } 3994 } 3995 } 3996 3997 return NumLiterals + NumExprs <= 1; 3998 } 3999 4000 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4001 const unsigned Opc = Inst.getOpcode(); 4002 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4003 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4004 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4005 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4006 4007 if (OpSel & ~3) 4008 return false; 4009 } 4010 return true; 4011 } 4012 4013 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4014 const OperandVector &Operands) { 4015 const unsigned Opc = Inst.getOpcode(); 4016 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4017 if (DppCtrlIdx < 0) 4018 return true; 4019 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4020 4021 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4022 // DPP64 is supported for row_newbcast only. 4023 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4024 if (Src0Idx >= 0 && 4025 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4026 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4027 Error(S, "64 bit dpp only supports row_newbcast"); 4028 return false; 4029 } 4030 } 4031 4032 return true; 4033 } 4034 4035 // Check if VCC register matches wavefront size 4036 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4037 auto FB = getFeatureBits(); 4038 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4039 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4040 } 4041 4042 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4043 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4044 const OperandVector &Operands) { 4045 unsigned Opcode = Inst.getOpcode(); 4046 const MCInstrDesc &Desc = MII.get(Opcode); 4047 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4048 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4049 ImmIdx == -1) 4050 return true; 4051 4052 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4053 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4054 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4055 4056 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4057 4058 unsigned NumExprs = 0; 4059 unsigned NumLiterals = 0; 4060 uint32_t LiteralValue; 4061 4062 for (int OpIdx : OpIndices) { 4063 if (OpIdx == -1) 4064 continue; 4065 4066 const MCOperand &MO = Inst.getOperand(OpIdx); 4067 if (!MO.isImm() && !MO.isExpr()) 4068 continue; 4069 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4070 continue; 4071 4072 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4073 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4074 Error(getConstLoc(Operands), 4075 "inline constants are not allowed for this operand"); 4076 return false; 4077 } 4078 4079 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4080 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4081 if (NumLiterals == 0 || LiteralValue != Value) { 4082 LiteralValue = Value; 4083 ++NumLiterals; 4084 } 4085 } else if (MO.isExpr()) { 4086 ++NumExprs; 4087 } 4088 } 4089 NumLiterals += NumExprs; 4090 4091 if (!NumLiterals) 4092 return true; 4093 4094 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4095 Error(getLitLoc(Operands), "literal operands are not supported"); 4096 return false; 4097 } 4098 4099 if (NumLiterals > 1) { 4100 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4101 return false; 4102 } 4103 4104 return true; 4105 } 4106 4107 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4108 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4109 const MCRegisterInfo *MRI) { 4110 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4111 if (OpIdx < 0) 4112 return -1; 4113 4114 const MCOperand &Op = Inst.getOperand(OpIdx); 4115 if (!Op.isReg()) 4116 return -1; 4117 4118 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4119 auto Reg = Sub ? Sub : Op.getReg(); 4120 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4121 return AGPR32.contains(Reg) ? 1 : 0; 4122 } 4123 4124 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4125 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4126 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4127 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4128 SIInstrFlags::DS)) == 0) 4129 return true; 4130 4131 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4132 : AMDGPU::OpName::vdata; 4133 4134 const MCRegisterInfo *MRI = getMRI(); 4135 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4136 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4137 4138 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4139 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4140 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4141 return false; 4142 } 4143 4144 auto FB = getFeatureBits(); 4145 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4146 if (DataAreg < 0 || DstAreg < 0) 4147 return true; 4148 return DstAreg == DataAreg; 4149 } 4150 4151 return DstAreg < 1 && DataAreg < 1; 4152 } 4153 4154 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4155 auto FB = getFeatureBits(); 4156 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4157 return true; 4158 4159 const MCRegisterInfo *MRI = getMRI(); 4160 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4161 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4162 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4163 const MCOperand &Op = Inst.getOperand(I); 4164 if (!Op.isReg()) 4165 continue; 4166 4167 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4168 if (!Sub) 4169 continue; 4170 4171 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4172 return false; 4173 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4174 return false; 4175 } 4176 4177 return true; 4178 } 4179 4180 // gfx90a has an undocumented limitation: 4181 // DS_GWS opcodes must use even aligned registers. 4182 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4183 const OperandVector &Operands) { 4184 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4185 return true; 4186 4187 int Opc = Inst.getOpcode(); 4188 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4189 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4190 return true; 4191 4192 const MCRegisterInfo *MRI = getMRI(); 4193 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4194 int Data0Pos = 4195 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4196 assert(Data0Pos != -1); 4197 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4198 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4199 if (RegIdx & 1) { 4200 SMLoc RegLoc = getRegLoc(Reg, Operands); 4201 Error(RegLoc, "vgpr must be even aligned"); 4202 return false; 4203 } 4204 4205 return true; 4206 } 4207 4208 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4209 const OperandVector &Operands, 4210 const SMLoc &IDLoc) { 4211 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4212 AMDGPU::OpName::cpol); 4213 if (CPolPos == -1) 4214 return true; 4215 4216 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4217 4218 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4219 if ((TSFlags & (SIInstrFlags::SMRD)) && 4220 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4221 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4222 return false; 4223 } 4224 4225 if (isGFX90A() && (CPol & CPol::SCC)) { 4226 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4227 StringRef CStr(S.getPointer()); 4228 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4229 Error(S, "scc is not supported on this GPU"); 4230 return false; 4231 } 4232 4233 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4234 return true; 4235 4236 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4237 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4238 Error(IDLoc, "instruction must use glc"); 4239 return false; 4240 } 4241 } else { 4242 if (CPol & CPol::GLC) { 4243 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4244 StringRef CStr(S.getPointer()); 4245 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4246 Error(S, "instruction must not use glc"); 4247 return false; 4248 } 4249 } 4250 4251 return true; 4252 } 4253 4254 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4255 const SMLoc &IDLoc, 4256 const OperandVector &Operands) { 4257 if (auto ErrMsg = validateLdsDirect(Inst)) { 4258 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4259 return false; 4260 } 4261 if (!validateSOPLiteral(Inst)) { 4262 Error(getLitLoc(Operands), 4263 "only one literal operand is allowed"); 4264 return false; 4265 } 4266 if (!validateVOPLiteral(Inst, Operands)) { 4267 return false; 4268 } 4269 if (!validateConstantBusLimitations(Inst, Operands)) { 4270 return false; 4271 } 4272 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4273 return false; 4274 } 4275 if (!validateIntClampSupported(Inst)) { 4276 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4277 "integer clamping is not supported on this GPU"); 4278 return false; 4279 } 4280 if (!validateOpSel(Inst)) { 4281 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4282 "invalid op_sel operand"); 4283 return false; 4284 } 4285 if (!validateDPP(Inst, Operands)) { 4286 return false; 4287 } 4288 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4289 if (!validateMIMGD16(Inst)) { 4290 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4291 "d16 modifier is not supported on this GPU"); 4292 return false; 4293 } 4294 if (!validateMIMGDim(Inst)) { 4295 Error(IDLoc, "dim modifier is required on this GPU"); 4296 return false; 4297 } 4298 if (!validateMIMGMSAA(Inst)) { 4299 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4300 "invalid dim; must be MSAA type"); 4301 return false; 4302 } 4303 if (!validateMIMGDataSize(Inst)) { 4304 Error(IDLoc, 4305 "image data size does not match dmask and tfe"); 4306 return false; 4307 } 4308 if (!validateMIMGAddrSize(Inst)) { 4309 Error(IDLoc, 4310 "image address size does not match dim and a16"); 4311 return false; 4312 } 4313 if (!validateMIMGAtomicDMask(Inst)) { 4314 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4315 "invalid atomic image dmask"); 4316 return false; 4317 } 4318 if (!validateMIMGGatherDMask(Inst)) { 4319 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4320 "invalid image_gather dmask: only one bit must be set"); 4321 return false; 4322 } 4323 if (!validateMovrels(Inst, Operands)) { 4324 return false; 4325 } 4326 if (!validateFlatOffset(Inst, Operands)) { 4327 return false; 4328 } 4329 if (!validateSMEMOffset(Inst, Operands)) { 4330 return false; 4331 } 4332 if (!validateMAIAccWrite(Inst, Operands)) { 4333 return false; 4334 } 4335 if (!validateMFMA(Inst, Operands)) { 4336 return false; 4337 } 4338 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4339 return false; 4340 } 4341 4342 if (!validateAGPRLdSt(Inst)) { 4343 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4344 ? "invalid register class: data and dst should be all VGPR or AGPR" 4345 : "invalid register class: agpr loads and stores not supported on this GPU" 4346 ); 4347 return false; 4348 } 4349 if (!validateVGPRAlign(Inst)) { 4350 Error(IDLoc, 4351 "invalid register class: vgpr tuples must be 64 bit aligned"); 4352 return false; 4353 } 4354 if (!validateGWS(Inst, Operands)) { 4355 return false; 4356 } 4357 4358 if (!validateDivScale(Inst)) { 4359 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4360 return false; 4361 } 4362 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4363 return false; 4364 } 4365 4366 return true; 4367 } 4368 4369 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4370 const FeatureBitset &FBS, 4371 unsigned VariantID = 0); 4372 4373 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4374 const FeatureBitset &AvailableFeatures, 4375 unsigned VariantID); 4376 4377 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4378 const FeatureBitset &FBS) { 4379 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4380 } 4381 4382 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4383 const FeatureBitset &FBS, 4384 ArrayRef<unsigned> Variants) { 4385 for (auto Variant : Variants) { 4386 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4387 return true; 4388 } 4389 4390 return false; 4391 } 4392 4393 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4394 const SMLoc &IDLoc) { 4395 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4396 4397 // Check if requested instruction variant is supported. 4398 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4399 return false; 4400 4401 // This instruction is not supported. 4402 // Clear any other pending errors because they are no longer relevant. 4403 getParser().clearPendingErrors(); 4404 4405 // Requested instruction variant is not supported. 4406 // Check if any other variants are supported. 4407 StringRef VariantName = getMatchedVariantName(); 4408 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4409 return Error(IDLoc, 4410 Twine(VariantName, 4411 " variant of this instruction is not supported")); 4412 } 4413 4414 // Finally check if this instruction is supported on any other GPU. 4415 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4416 return Error(IDLoc, "instruction not supported on this GPU"); 4417 } 4418 4419 // Instruction not supported on any GPU. Probably a typo. 4420 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4421 return Error(IDLoc, "invalid instruction" + Suggestion); 4422 } 4423 4424 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4425 OperandVector &Operands, 4426 MCStreamer &Out, 4427 uint64_t &ErrorInfo, 4428 bool MatchingInlineAsm) { 4429 MCInst Inst; 4430 unsigned Result = Match_Success; 4431 for (auto Variant : getMatchedVariants()) { 4432 uint64_t EI; 4433 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4434 Variant); 4435 // We order match statuses from least to most specific. We use most specific 4436 // status as resulting 4437 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4438 if ((R == Match_Success) || 4439 (R == Match_PreferE32) || 4440 (R == Match_MissingFeature && Result != Match_PreferE32) || 4441 (R == Match_InvalidOperand && Result != Match_MissingFeature 4442 && Result != Match_PreferE32) || 4443 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4444 && Result != Match_MissingFeature 4445 && Result != Match_PreferE32)) { 4446 Result = R; 4447 ErrorInfo = EI; 4448 } 4449 if (R == Match_Success) 4450 break; 4451 } 4452 4453 if (Result == Match_Success) { 4454 if (!validateInstruction(Inst, IDLoc, Operands)) { 4455 return true; 4456 } 4457 Inst.setLoc(IDLoc); 4458 Out.emitInstruction(Inst, getSTI()); 4459 return false; 4460 } 4461 4462 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4463 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4464 return true; 4465 } 4466 4467 switch (Result) { 4468 default: break; 4469 case Match_MissingFeature: 4470 // It has been verified that the specified instruction 4471 // mnemonic is valid. A match was found but it requires 4472 // features which are not supported on this GPU. 4473 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4474 4475 case Match_InvalidOperand: { 4476 SMLoc ErrorLoc = IDLoc; 4477 if (ErrorInfo != ~0ULL) { 4478 if (ErrorInfo >= Operands.size()) { 4479 return Error(IDLoc, "too few operands for instruction"); 4480 } 4481 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4482 if (ErrorLoc == SMLoc()) 4483 ErrorLoc = IDLoc; 4484 } 4485 return Error(ErrorLoc, "invalid operand for instruction"); 4486 } 4487 4488 case Match_PreferE32: 4489 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4490 "should be encoded as e32"); 4491 case Match_MnemonicFail: 4492 llvm_unreachable("Invalid instructions should have been handled already"); 4493 } 4494 llvm_unreachable("Implement any new match types added!"); 4495 } 4496 4497 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4498 int64_t Tmp = -1; 4499 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4500 return true; 4501 } 4502 if (getParser().parseAbsoluteExpression(Tmp)) { 4503 return true; 4504 } 4505 Ret = static_cast<uint32_t>(Tmp); 4506 return false; 4507 } 4508 4509 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4510 uint32_t &Minor) { 4511 if (ParseAsAbsoluteExpression(Major)) 4512 return TokError("invalid major version"); 4513 4514 if (!trySkipToken(AsmToken::Comma)) 4515 return TokError("minor version number required, comma expected"); 4516 4517 if (ParseAsAbsoluteExpression(Minor)) 4518 return TokError("invalid minor version"); 4519 4520 return false; 4521 } 4522 4523 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4524 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4525 return TokError("directive only supported for amdgcn architecture"); 4526 4527 std::string TargetIDDirective; 4528 SMLoc TargetStart = getTok().getLoc(); 4529 if (getParser().parseEscapedString(TargetIDDirective)) 4530 return true; 4531 4532 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4533 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4534 return getParser().Error(TargetRange.Start, 4535 (Twine(".amdgcn_target directive's target id ") + 4536 Twine(TargetIDDirective) + 4537 Twine(" does not match the specified target id ") + 4538 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4539 4540 return false; 4541 } 4542 4543 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4544 return Error(Range.Start, "value out of range", Range); 4545 } 4546 4547 bool AMDGPUAsmParser::calculateGPRBlocks( 4548 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4549 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4550 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4551 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4552 // TODO(scott.linder): These calculations are duplicated from 4553 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4554 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4555 4556 unsigned NumVGPRs = NextFreeVGPR; 4557 unsigned NumSGPRs = NextFreeSGPR; 4558 4559 if (Version.Major >= 10) 4560 NumSGPRs = 0; 4561 else { 4562 unsigned MaxAddressableNumSGPRs = 4563 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4564 4565 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4566 NumSGPRs > MaxAddressableNumSGPRs) 4567 return OutOfRangeError(SGPRRange); 4568 4569 NumSGPRs += 4570 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4571 4572 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4573 NumSGPRs > MaxAddressableNumSGPRs) 4574 return OutOfRangeError(SGPRRange); 4575 4576 if (Features.test(FeatureSGPRInitBug)) 4577 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4578 } 4579 4580 VGPRBlocks = 4581 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4582 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4583 4584 return false; 4585 } 4586 4587 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4588 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4589 return TokError("directive only supported for amdgcn architecture"); 4590 4591 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4592 return TokError("directive only supported for amdhsa OS"); 4593 4594 StringRef KernelName; 4595 if (getParser().parseIdentifier(KernelName)) 4596 return true; 4597 4598 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4599 4600 StringSet<> Seen; 4601 4602 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4603 4604 SMRange VGPRRange; 4605 uint64_t NextFreeVGPR = 0; 4606 uint64_t AccumOffset = 0; 4607 SMRange SGPRRange; 4608 uint64_t NextFreeSGPR = 0; 4609 4610 // Count the number of user SGPRs implied from the enabled feature bits. 4611 unsigned ImpliedUserSGPRCount = 0; 4612 4613 // Track if the asm explicitly contains the directive for the user SGPR 4614 // count. 4615 Optional<unsigned> ExplicitUserSGPRCount; 4616 bool ReserveVCC = true; 4617 bool ReserveFlatScr = true; 4618 Optional<bool> EnableWavefrontSize32; 4619 4620 while (true) { 4621 while (trySkipToken(AsmToken::EndOfStatement)); 4622 4623 StringRef ID; 4624 SMRange IDRange = getTok().getLocRange(); 4625 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4626 return true; 4627 4628 if (ID == ".end_amdhsa_kernel") 4629 break; 4630 4631 if (Seen.find(ID) != Seen.end()) 4632 return TokError(".amdhsa_ directives cannot be repeated"); 4633 Seen.insert(ID); 4634 4635 SMLoc ValStart = getLoc(); 4636 int64_t IVal; 4637 if (getParser().parseAbsoluteExpression(IVal)) 4638 return true; 4639 SMLoc ValEnd = getLoc(); 4640 SMRange ValRange = SMRange(ValStart, ValEnd); 4641 4642 if (IVal < 0) 4643 return OutOfRangeError(ValRange); 4644 4645 uint64_t Val = IVal; 4646 4647 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4648 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4649 return OutOfRangeError(RANGE); \ 4650 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4651 4652 if (ID == ".amdhsa_group_segment_fixed_size") { 4653 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4654 return OutOfRangeError(ValRange); 4655 KD.group_segment_fixed_size = Val; 4656 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4657 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4658 return OutOfRangeError(ValRange); 4659 KD.private_segment_fixed_size = Val; 4660 } else if (ID == ".amdhsa_kernarg_size") { 4661 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4662 return OutOfRangeError(ValRange); 4663 KD.kernarg_size = Val; 4664 } else if (ID == ".amdhsa_user_sgpr_count") { 4665 ExplicitUserSGPRCount = Val; 4666 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4667 if (hasArchitectedFlatScratch()) 4668 return Error(IDRange.Start, 4669 "directive is not supported with architected flat scratch", 4670 IDRange); 4671 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4672 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4673 Val, ValRange); 4674 if (Val) 4675 ImpliedUserSGPRCount += 4; 4676 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4677 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4678 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4679 ValRange); 4680 if (Val) 4681 ImpliedUserSGPRCount += 2; 4682 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4683 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4684 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4685 ValRange); 4686 if (Val) 4687 ImpliedUserSGPRCount += 2; 4688 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4689 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4690 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4691 Val, ValRange); 4692 if (Val) 4693 ImpliedUserSGPRCount += 2; 4694 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4695 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4696 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4697 ValRange); 4698 if (Val) 4699 ImpliedUserSGPRCount += 2; 4700 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4701 if (hasArchitectedFlatScratch()) 4702 return Error(IDRange.Start, 4703 "directive is not supported with architected flat scratch", 4704 IDRange); 4705 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4706 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4707 ValRange); 4708 if (Val) 4709 ImpliedUserSGPRCount += 2; 4710 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4711 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4712 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4713 Val, ValRange); 4714 if (Val) 4715 ImpliedUserSGPRCount += 1; 4716 } else if (ID == ".amdhsa_wavefront_size32") { 4717 if (IVersion.Major < 10) 4718 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4719 EnableWavefrontSize32 = Val; 4720 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4721 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4722 Val, ValRange); 4723 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4724 if (hasArchitectedFlatScratch()) 4725 return Error(IDRange.Start, 4726 "directive is not supported with architected flat scratch", 4727 IDRange); 4728 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4729 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4730 } else if (ID == ".amdhsa_enable_private_segment") { 4731 if (!hasArchitectedFlatScratch()) 4732 return Error( 4733 IDRange.Start, 4734 "directive is not supported without architected flat scratch", 4735 IDRange); 4736 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4737 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4738 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4740 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4741 ValRange); 4742 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4744 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4745 ValRange); 4746 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4748 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4749 ValRange); 4750 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4751 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4752 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4753 ValRange); 4754 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4755 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4756 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4757 ValRange); 4758 } else if (ID == ".amdhsa_next_free_vgpr") { 4759 VGPRRange = ValRange; 4760 NextFreeVGPR = Val; 4761 } else if (ID == ".amdhsa_next_free_sgpr") { 4762 SGPRRange = ValRange; 4763 NextFreeSGPR = Val; 4764 } else if (ID == ".amdhsa_accum_offset") { 4765 if (!isGFX90A()) 4766 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4767 AccumOffset = Val; 4768 } else if (ID == ".amdhsa_reserve_vcc") { 4769 if (!isUInt<1>(Val)) 4770 return OutOfRangeError(ValRange); 4771 ReserveVCC = Val; 4772 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4773 if (IVersion.Major < 7) 4774 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4775 if (hasArchitectedFlatScratch()) 4776 return Error(IDRange.Start, 4777 "directive is not supported with architected flat scratch", 4778 IDRange); 4779 if (!isUInt<1>(Val)) 4780 return OutOfRangeError(ValRange); 4781 ReserveFlatScr = Val; 4782 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4783 if (IVersion.Major < 8) 4784 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4785 if (!isUInt<1>(Val)) 4786 return OutOfRangeError(ValRange); 4787 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4788 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4789 IDRange); 4790 } else if (ID == ".amdhsa_float_round_mode_32") { 4791 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4792 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4793 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4794 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4795 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4796 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4797 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4798 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4799 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4800 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4801 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4802 ValRange); 4803 } else if (ID == ".amdhsa_dx10_clamp") { 4804 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4805 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4806 } else if (ID == ".amdhsa_ieee_mode") { 4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4808 Val, ValRange); 4809 } else if (ID == ".amdhsa_fp16_overflow") { 4810 if (IVersion.Major < 9) 4811 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4812 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4813 ValRange); 4814 } else if (ID == ".amdhsa_tg_split") { 4815 if (!isGFX90A()) 4816 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4817 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4818 ValRange); 4819 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4820 if (IVersion.Major < 10) 4821 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4822 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4823 ValRange); 4824 } else if (ID == ".amdhsa_memory_ordered") { 4825 if (IVersion.Major < 10) 4826 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4827 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4828 ValRange); 4829 } else if (ID == ".amdhsa_forward_progress") { 4830 if (IVersion.Major < 10) 4831 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4832 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4833 ValRange); 4834 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4835 PARSE_BITS_ENTRY( 4836 KD.compute_pgm_rsrc2, 4837 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4838 ValRange); 4839 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4840 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4841 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4842 Val, ValRange); 4843 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4844 PARSE_BITS_ENTRY( 4845 KD.compute_pgm_rsrc2, 4846 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4847 ValRange); 4848 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4849 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4850 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4851 Val, ValRange); 4852 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4853 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4854 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4855 Val, ValRange); 4856 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4857 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4858 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4859 Val, ValRange); 4860 } else if (ID == ".amdhsa_exception_int_div_zero") { 4861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4862 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4863 Val, ValRange); 4864 } else { 4865 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4866 } 4867 4868 #undef PARSE_BITS_ENTRY 4869 } 4870 4871 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4872 return TokError(".amdhsa_next_free_vgpr directive is required"); 4873 4874 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4875 return TokError(".amdhsa_next_free_sgpr directive is required"); 4876 4877 unsigned VGPRBlocks; 4878 unsigned SGPRBlocks; 4879 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4880 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4881 EnableWavefrontSize32, NextFreeVGPR, 4882 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4883 SGPRBlocks)) 4884 return true; 4885 4886 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4887 VGPRBlocks)) 4888 return OutOfRangeError(VGPRRange); 4889 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4890 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4891 4892 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4893 SGPRBlocks)) 4894 return OutOfRangeError(SGPRRange); 4895 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4896 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4897 SGPRBlocks); 4898 4899 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4900 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4901 "enabled user SGPRs"); 4902 4903 unsigned UserSGPRCount = 4904 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4905 4906 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4907 return TokError("too many user SGPRs enabled"); 4908 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4909 UserSGPRCount); 4910 4911 if (isGFX90A()) { 4912 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4913 return TokError(".amdhsa_accum_offset directive is required"); 4914 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4915 return TokError("accum_offset should be in range [4..256] in " 4916 "increments of 4"); 4917 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4918 return TokError("accum_offset exceeds total VGPR allocation"); 4919 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4920 (AccumOffset / 4 - 1)); 4921 } 4922 4923 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4924 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4925 ReserveFlatScr); 4926 return false; 4927 } 4928 4929 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4930 uint32_t Major; 4931 uint32_t Minor; 4932 4933 if (ParseDirectiveMajorMinor(Major, Minor)) 4934 return true; 4935 4936 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4937 return false; 4938 } 4939 4940 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4941 uint32_t Major; 4942 uint32_t Minor; 4943 uint32_t Stepping; 4944 StringRef VendorName; 4945 StringRef ArchName; 4946 4947 // If this directive has no arguments, then use the ISA version for the 4948 // targeted GPU. 4949 if (isToken(AsmToken::EndOfStatement)) { 4950 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4951 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4952 ISA.Stepping, 4953 "AMD", "AMDGPU"); 4954 return false; 4955 } 4956 4957 if (ParseDirectiveMajorMinor(Major, Minor)) 4958 return true; 4959 4960 if (!trySkipToken(AsmToken::Comma)) 4961 return TokError("stepping version number required, comma expected"); 4962 4963 if (ParseAsAbsoluteExpression(Stepping)) 4964 return TokError("invalid stepping version"); 4965 4966 if (!trySkipToken(AsmToken::Comma)) 4967 return TokError("vendor name required, comma expected"); 4968 4969 if (!parseString(VendorName, "invalid vendor name")) 4970 return true; 4971 4972 if (!trySkipToken(AsmToken::Comma)) 4973 return TokError("arch name required, comma expected"); 4974 4975 if (!parseString(ArchName, "invalid arch name")) 4976 return true; 4977 4978 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4979 VendorName, ArchName); 4980 return false; 4981 } 4982 4983 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4984 amd_kernel_code_t &Header) { 4985 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4986 // assembly for backwards compatibility. 4987 if (ID == "max_scratch_backing_memory_byte_size") { 4988 Parser.eatToEndOfStatement(); 4989 return false; 4990 } 4991 4992 SmallString<40> ErrStr; 4993 raw_svector_ostream Err(ErrStr); 4994 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4995 return TokError(Err.str()); 4996 } 4997 Lex(); 4998 4999 if (ID == "enable_wavefront_size32") { 5000 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5001 if (!isGFX10Plus()) 5002 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5003 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5004 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5005 } else { 5006 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5007 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5008 } 5009 } 5010 5011 if (ID == "wavefront_size") { 5012 if (Header.wavefront_size == 5) { 5013 if (!isGFX10Plus()) 5014 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5015 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5016 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5017 } else if (Header.wavefront_size == 6) { 5018 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5019 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5020 } 5021 } 5022 5023 if (ID == "enable_wgp_mode") { 5024 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5025 !isGFX10Plus()) 5026 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5027 } 5028 5029 if (ID == "enable_mem_ordered") { 5030 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5031 !isGFX10Plus()) 5032 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5033 } 5034 5035 if (ID == "enable_fwd_progress") { 5036 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5037 !isGFX10Plus()) 5038 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5039 } 5040 5041 return false; 5042 } 5043 5044 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5045 amd_kernel_code_t Header; 5046 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5047 5048 while (true) { 5049 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5050 // will set the current token to EndOfStatement. 5051 while(trySkipToken(AsmToken::EndOfStatement)); 5052 5053 StringRef ID; 5054 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5055 return true; 5056 5057 if (ID == ".end_amd_kernel_code_t") 5058 break; 5059 5060 if (ParseAMDKernelCodeTValue(ID, Header)) 5061 return true; 5062 } 5063 5064 getTargetStreamer().EmitAMDKernelCodeT(Header); 5065 5066 return false; 5067 } 5068 5069 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5070 StringRef KernelName; 5071 if (!parseId(KernelName, "expected symbol name")) 5072 return true; 5073 5074 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5075 ELF::STT_AMDGPU_HSA_KERNEL); 5076 5077 KernelScope.initialize(getContext()); 5078 return false; 5079 } 5080 5081 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5082 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5083 return Error(getLoc(), 5084 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5085 "architectures"); 5086 } 5087 5088 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5089 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5090 return Error(getParser().getTok().getLoc(), "target id must match options"); 5091 5092 getTargetStreamer().EmitISAVersion(); 5093 Lex(); 5094 5095 return false; 5096 } 5097 5098 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5099 const char *AssemblerDirectiveBegin; 5100 const char *AssemblerDirectiveEnd; 5101 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5102 isHsaAbiVersion3Or4(&getSTI()) 5103 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5104 HSAMD::V3::AssemblerDirectiveEnd) 5105 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5106 HSAMD::AssemblerDirectiveEnd); 5107 5108 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5109 return Error(getLoc(), 5110 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5111 "not available on non-amdhsa OSes")).str()); 5112 } 5113 5114 std::string HSAMetadataString; 5115 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5116 HSAMetadataString)) 5117 return true; 5118 5119 if (isHsaAbiVersion3Or4(&getSTI())) { 5120 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5121 return Error(getLoc(), "invalid HSA metadata"); 5122 } else { 5123 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5124 return Error(getLoc(), "invalid HSA metadata"); 5125 } 5126 5127 return false; 5128 } 5129 5130 /// Common code to parse out a block of text (typically YAML) between start and 5131 /// end directives. 5132 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5133 const char *AssemblerDirectiveEnd, 5134 std::string &CollectString) { 5135 5136 raw_string_ostream CollectStream(CollectString); 5137 5138 getLexer().setSkipSpace(false); 5139 5140 bool FoundEnd = false; 5141 while (!isToken(AsmToken::Eof)) { 5142 while (isToken(AsmToken::Space)) { 5143 CollectStream << getTokenStr(); 5144 Lex(); 5145 } 5146 5147 if (trySkipId(AssemblerDirectiveEnd)) { 5148 FoundEnd = true; 5149 break; 5150 } 5151 5152 CollectStream << Parser.parseStringToEndOfStatement() 5153 << getContext().getAsmInfo()->getSeparatorString(); 5154 5155 Parser.eatToEndOfStatement(); 5156 } 5157 5158 getLexer().setSkipSpace(true); 5159 5160 if (isToken(AsmToken::Eof) && !FoundEnd) { 5161 return TokError(Twine("expected directive ") + 5162 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5163 } 5164 5165 CollectStream.flush(); 5166 return false; 5167 } 5168 5169 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5170 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5171 std::string String; 5172 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5173 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5174 return true; 5175 5176 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5177 if (!PALMetadata->setFromString(String)) 5178 return Error(getLoc(), "invalid PAL metadata"); 5179 return false; 5180 } 5181 5182 /// Parse the assembler directive for old linear-format PAL metadata. 5183 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5184 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5185 return Error(getLoc(), 5186 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5187 "not available on non-amdpal OSes")).str()); 5188 } 5189 5190 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5191 PALMetadata->setLegacy(); 5192 for (;;) { 5193 uint32_t Key, Value; 5194 if (ParseAsAbsoluteExpression(Key)) { 5195 return TokError(Twine("invalid value in ") + 5196 Twine(PALMD::AssemblerDirective)); 5197 } 5198 if (!trySkipToken(AsmToken::Comma)) { 5199 return TokError(Twine("expected an even number of values in ") + 5200 Twine(PALMD::AssemblerDirective)); 5201 } 5202 if (ParseAsAbsoluteExpression(Value)) { 5203 return TokError(Twine("invalid value in ") + 5204 Twine(PALMD::AssemblerDirective)); 5205 } 5206 PALMetadata->setRegister(Key, Value); 5207 if (!trySkipToken(AsmToken::Comma)) 5208 break; 5209 } 5210 return false; 5211 } 5212 5213 /// ParseDirectiveAMDGPULDS 5214 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5215 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5216 if (getParser().checkForValidSection()) 5217 return true; 5218 5219 StringRef Name; 5220 SMLoc NameLoc = getLoc(); 5221 if (getParser().parseIdentifier(Name)) 5222 return TokError("expected identifier in directive"); 5223 5224 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5225 if (parseToken(AsmToken::Comma, "expected ','")) 5226 return true; 5227 5228 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5229 5230 int64_t Size; 5231 SMLoc SizeLoc = getLoc(); 5232 if (getParser().parseAbsoluteExpression(Size)) 5233 return true; 5234 if (Size < 0) 5235 return Error(SizeLoc, "size must be non-negative"); 5236 if (Size > LocalMemorySize) 5237 return Error(SizeLoc, "size is too large"); 5238 5239 int64_t Alignment = 4; 5240 if (trySkipToken(AsmToken::Comma)) { 5241 SMLoc AlignLoc = getLoc(); 5242 if (getParser().parseAbsoluteExpression(Alignment)) 5243 return true; 5244 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5245 return Error(AlignLoc, "alignment must be a power of two"); 5246 5247 // Alignment larger than the size of LDS is possible in theory, as long 5248 // as the linker manages to place to symbol at address 0, but we do want 5249 // to make sure the alignment fits nicely into a 32-bit integer. 5250 if (Alignment >= 1u << 31) 5251 return Error(AlignLoc, "alignment is too large"); 5252 } 5253 5254 if (parseToken(AsmToken::EndOfStatement, 5255 "unexpected token in '.amdgpu_lds' directive")) 5256 return true; 5257 5258 Symbol->redefineIfPossible(); 5259 if (!Symbol->isUndefined()) 5260 return Error(NameLoc, "invalid symbol redefinition"); 5261 5262 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5263 return false; 5264 } 5265 5266 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5267 StringRef IDVal = DirectiveID.getString(); 5268 5269 if (isHsaAbiVersion3Or4(&getSTI())) { 5270 if (IDVal == ".amdhsa_kernel") 5271 return ParseDirectiveAMDHSAKernel(); 5272 5273 // TODO: Restructure/combine with PAL metadata directive. 5274 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5275 return ParseDirectiveHSAMetadata(); 5276 } else { 5277 if (IDVal == ".hsa_code_object_version") 5278 return ParseDirectiveHSACodeObjectVersion(); 5279 5280 if (IDVal == ".hsa_code_object_isa") 5281 return ParseDirectiveHSACodeObjectISA(); 5282 5283 if (IDVal == ".amd_kernel_code_t") 5284 return ParseDirectiveAMDKernelCodeT(); 5285 5286 if (IDVal == ".amdgpu_hsa_kernel") 5287 return ParseDirectiveAMDGPUHsaKernel(); 5288 5289 if (IDVal == ".amd_amdgpu_isa") 5290 return ParseDirectiveISAVersion(); 5291 5292 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5293 return ParseDirectiveHSAMetadata(); 5294 } 5295 5296 if (IDVal == ".amdgcn_target") 5297 return ParseDirectiveAMDGCNTarget(); 5298 5299 if (IDVal == ".amdgpu_lds") 5300 return ParseDirectiveAMDGPULDS(); 5301 5302 if (IDVal == PALMD::AssemblerDirectiveBegin) 5303 return ParseDirectivePALMetadataBegin(); 5304 5305 if (IDVal == PALMD::AssemblerDirective) 5306 return ParseDirectivePALMetadata(); 5307 5308 return true; 5309 } 5310 5311 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5312 unsigned RegNo) { 5313 5314 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5315 R.isValid(); ++R) { 5316 if (*R == RegNo) 5317 return isGFX9Plus(); 5318 } 5319 5320 // GFX10 has 2 more SGPRs 104 and 105. 5321 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5322 R.isValid(); ++R) { 5323 if (*R == RegNo) 5324 return hasSGPR104_SGPR105(); 5325 } 5326 5327 switch (RegNo) { 5328 case AMDGPU::SRC_SHARED_BASE: 5329 case AMDGPU::SRC_SHARED_LIMIT: 5330 case AMDGPU::SRC_PRIVATE_BASE: 5331 case AMDGPU::SRC_PRIVATE_LIMIT: 5332 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5333 return isGFX9Plus(); 5334 case AMDGPU::TBA: 5335 case AMDGPU::TBA_LO: 5336 case AMDGPU::TBA_HI: 5337 case AMDGPU::TMA: 5338 case AMDGPU::TMA_LO: 5339 case AMDGPU::TMA_HI: 5340 return !isGFX9Plus(); 5341 case AMDGPU::XNACK_MASK: 5342 case AMDGPU::XNACK_MASK_LO: 5343 case AMDGPU::XNACK_MASK_HI: 5344 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5345 case AMDGPU::SGPR_NULL: 5346 return isGFX10Plus(); 5347 default: 5348 break; 5349 } 5350 5351 if (isCI()) 5352 return true; 5353 5354 if (isSI() || isGFX10Plus()) { 5355 // No flat_scr on SI. 5356 // On GFX10 flat scratch is not a valid register operand and can only be 5357 // accessed with s_setreg/s_getreg. 5358 switch (RegNo) { 5359 case AMDGPU::FLAT_SCR: 5360 case AMDGPU::FLAT_SCR_LO: 5361 case AMDGPU::FLAT_SCR_HI: 5362 return false; 5363 default: 5364 return true; 5365 } 5366 } 5367 5368 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5369 // SI/CI have. 5370 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5371 R.isValid(); ++R) { 5372 if (*R == RegNo) 5373 return hasSGPR102_SGPR103(); 5374 } 5375 5376 return true; 5377 } 5378 5379 OperandMatchResultTy 5380 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5381 OperandMode Mode) { 5382 // Try to parse with a custom parser 5383 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5384 5385 // If we successfully parsed the operand or if there as an error parsing, 5386 // we are done. 5387 // 5388 // If we are parsing after we reach EndOfStatement then this means we 5389 // are appending default values to the Operands list. This is only done 5390 // by custom parser, so we shouldn't continue on to the generic parsing. 5391 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5392 isToken(AsmToken::EndOfStatement)) 5393 return ResTy; 5394 5395 SMLoc RBraceLoc; 5396 SMLoc LBraceLoc = getLoc(); 5397 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5398 unsigned Prefix = Operands.size(); 5399 5400 for (;;) { 5401 auto Loc = getLoc(); 5402 ResTy = parseReg(Operands); 5403 if (ResTy == MatchOperand_NoMatch) 5404 Error(Loc, "expected a register"); 5405 if (ResTy != MatchOperand_Success) 5406 return MatchOperand_ParseFail; 5407 5408 RBraceLoc = getLoc(); 5409 if (trySkipToken(AsmToken::RBrac)) 5410 break; 5411 5412 if (!skipToken(AsmToken::Comma, 5413 "expected a comma or a closing square bracket")) { 5414 return MatchOperand_ParseFail; 5415 } 5416 } 5417 5418 if (Operands.size() - Prefix > 1) { 5419 Operands.insert(Operands.begin() + Prefix, 5420 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5421 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5422 } 5423 5424 return MatchOperand_Success; 5425 } 5426 5427 return parseRegOrImm(Operands); 5428 } 5429 5430 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5431 // Clear any forced encodings from the previous instruction. 5432 setForcedEncodingSize(0); 5433 setForcedDPP(false); 5434 setForcedSDWA(false); 5435 5436 if (Name.endswith("_e64")) { 5437 setForcedEncodingSize(64); 5438 return Name.substr(0, Name.size() - 4); 5439 } else if (Name.endswith("_e32")) { 5440 setForcedEncodingSize(32); 5441 return Name.substr(0, Name.size() - 4); 5442 } else if (Name.endswith("_dpp")) { 5443 setForcedDPP(true); 5444 return Name.substr(0, Name.size() - 4); 5445 } else if (Name.endswith("_sdwa")) { 5446 setForcedSDWA(true); 5447 return Name.substr(0, Name.size() - 5); 5448 } 5449 return Name; 5450 } 5451 5452 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5453 StringRef Name, 5454 SMLoc NameLoc, OperandVector &Operands) { 5455 // Add the instruction mnemonic 5456 Name = parseMnemonicSuffix(Name); 5457 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5458 5459 bool IsMIMG = Name.startswith("image_"); 5460 5461 while (!trySkipToken(AsmToken::EndOfStatement)) { 5462 OperandMode Mode = OperandMode_Default; 5463 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5464 Mode = OperandMode_NSA; 5465 CPolSeen = 0; 5466 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5467 5468 if (Res != MatchOperand_Success) { 5469 checkUnsupportedInstruction(Name, NameLoc); 5470 if (!Parser.hasPendingError()) { 5471 // FIXME: use real operand location rather than the current location. 5472 StringRef Msg = 5473 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5474 "not a valid operand."; 5475 Error(getLoc(), Msg); 5476 } 5477 while (!trySkipToken(AsmToken::EndOfStatement)) { 5478 lex(); 5479 } 5480 return true; 5481 } 5482 5483 // Eat the comma or space if there is one. 5484 trySkipToken(AsmToken::Comma); 5485 } 5486 5487 return false; 5488 } 5489 5490 //===----------------------------------------------------------------------===// 5491 // Utility functions 5492 //===----------------------------------------------------------------------===// 5493 5494 OperandMatchResultTy 5495 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5496 5497 if (!trySkipId(Prefix, AsmToken::Colon)) 5498 return MatchOperand_NoMatch; 5499 5500 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5501 } 5502 5503 OperandMatchResultTy 5504 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5505 AMDGPUOperand::ImmTy ImmTy, 5506 bool (*ConvertResult)(int64_t&)) { 5507 SMLoc S = getLoc(); 5508 int64_t Value = 0; 5509 5510 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5511 if (Res != MatchOperand_Success) 5512 return Res; 5513 5514 if (ConvertResult && !ConvertResult(Value)) { 5515 Error(S, "invalid " + StringRef(Prefix) + " value."); 5516 } 5517 5518 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5519 return MatchOperand_Success; 5520 } 5521 5522 OperandMatchResultTy 5523 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5524 OperandVector &Operands, 5525 AMDGPUOperand::ImmTy ImmTy, 5526 bool (*ConvertResult)(int64_t&)) { 5527 SMLoc S = getLoc(); 5528 if (!trySkipId(Prefix, AsmToken::Colon)) 5529 return MatchOperand_NoMatch; 5530 5531 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5532 return MatchOperand_ParseFail; 5533 5534 unsigned Val = 0; 5535 const unsigned MaxSize = 4; 5536 5537 // FIXME: How to verify the number of elements matches the number of src 5538 // operands? 5539 for (int I = 0; ; ++I) { 5540 int64_t Op; 5541 SMLoc Loc = getLoc(); 5542 if (!parseExpr(Op)) 5543 return MatchOperand_ParseFail; 5544 5545 if (Op != 0 && Op != 1) { 5546 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5547 return MatchOperand_ParseFail; 5548 } 5549 5550 Val |= (Op << I); 5551 5552 if (trySkipToken(AsmToken::RBrac)) 5553 break; 5554 5555 if (I + 1 == MaxSize) { 5556 Error(getLoc(), "expected a closing square bracket"); 5557 return MatchOperand_ParseFail; 5558 } 5559 5560 if (!skipToken(AsmToken::Comma, "expected a comma")) 5561 return MatchOperand_ParseFail; 5562 } 5563 5564 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5565 return MatchOperand_Success; 5566 } 5567 5568 OperandMatchResultTy 5569 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5570 AMDGPUOperand::ImmTy ImmTy) { 5571 int64_t Bit; 5572 SMLoc S = getLoc(); 5573 5574 if (trySkipId(Name)) { 5575 Bit = 1; 5576 } else if (trySkipId("no", Name)) { 5577 Bit = 0; 5578 } else { 5579 return MatchOperand_NoMatch; 5580 } 5581 5582 if (Name == "r128" && !hasMIMG_R128()) { 5583 Error(S, "r128 modifier is not supported on this GPU"); 5584 return MatchOperand_ParseFail; 5585 } 5586 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5587 Error(S, "a16 modifier is not supported on this GPU"); 5588 return MatchOperand_ParseFail; 5589 } 5590 5591 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5592 ImmTy = AMDGPUOperand::ImmTyR128A16; 5593 5594 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5595 return MatchOperand_Success; 5596 } 5597 5598 OperandMatchResultTy 5599 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5600 unsigned CPolOn = 0; 5601 unsigned CPolOff = 0; 5602 SMLoc S = getLoc(); 5603 5604 if (trySkipId("glc")) 5605 CPolOn = AMDGPU::CPol::GLC; 5606 else if (trySkipId("noglc")) 5607 CPolOff = AMDGPU::CPol::GLC; 5608 else if (trySkipId("slc")) 5609 CPolOn = AMDGPU::CPol::SLC; 5610 else if (trySkipId("noslc")) 5611 CPolOff = AMDGPU::CPol::SLC; 5612 else if (trySkipId("dlc")) 5613 CPolOn = AMDGPU::CPol::DLC; 5614 else if (trySkipId("nodlc")) 5615 CPolOff = AMDGPU::CPol::DLC; 5616 else if (trySkipId("scc")) 5617 CPolOn = AMDGPU::CPol::SCC; 5618 else if (trySkipId("noscc")) 5619 CPolOff = AMDGPU::CPol::SCC; 5620 else 5621 return MatchOperand_NoMatch; 5622 5623 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5624 Error(S, "dlc modifier is not supported on this GPU"); 5625 return MatchOperand_ParseFail; 5626 } 5627 5628 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5629 Error(S, "scc modifier is not supported on this GPU"); 5630 return MatchOperand_ParseFail; 5631 } 5632 5633 if (CPolSeen & (CPolOn | CPolOff)) { 5634 Error(S, "duplicate cache policy modifier"); 5635 return MatchOperand_ParseFail; 5636 } 5637 5638 CPolSeen |= (CPolOn | CPolOff); 5639 5640 for (unsigned I = 1; I != Operands.size(); ++I) { 5641 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5642 if (Op.isCPol()) { 5643 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5644 return MatchOperand_Success; 5645 } 5646 } 5647 5648 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5649 AMDGPUOperand::ImmTyCPol)); 5650 5651 return MatchOperand_Success; 5652 } 5653 5654 static void addOptionalImmOperand( 5655 MCInst& Inst, const OperandVector& Operands, 5656 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5657 AMDGPUOperand::ImmTy ImmT, 5658 int64_t Default = 0) { 5659 auto i = OptionalIdx.find(ImmT); 5660 if (i != OptionalIdx.end()) { 5661 unsigned Idx = i->second; 5662 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5663 } else { 5664 Inst.addOperand(MCOperand::createImm(Default)); 5665 } 5666 } 5667 5668 OperandMatchResultTy 5669 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5670 StringRef &Value, 5671 SMLoc &StringLoc) { 5672 if (!trySkipId(Prefix, AsmToken::Colon)) 5673 return MatchOperand_NoMatch; 5674 5675 StringLoc = getLoc(); 5676 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5677 : MatchOperand_ParseFail; 5678 } 5679 5680 //===----------------------------------------------------------------------===// 5681 // MTBUF format 5682 //===----------------------------------------------------------------------===// 5683 5684 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5685 int64_t MaxVal, 5686 int64_t &Fmt) { 5687 int64_t Val; 5688 SMLoc Loc = getLoc(); 5689 5690 auto Res = parseIntWithPrefix(Pref, Val); 5691 if (Res == MatchOperand_ParseFail) 5692 return false; 5693 if (Res == MatchOperand_NoMatch) 5694 return true; 5695 5696 if (Val < 0 || Val > MaxVal) { 5697 Error(Loc, Twine("out of range ", StringRef(Pref))); 5698 return false; 5699 } 5700 5701 Fmt = Val; 5702 return true; 5703 } 5704 5705 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5706 // values to live in a joint format operand in the MCInst encoding. 5707 OperandMatchResultTy 5708 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5709 using namespace llvm::AMDGPU::MTBUFFormat; 5710 5711 int64_t Dfmt = DFMT_UNDEF; 5712 int64_t Nfmt = NFMT_UNDEF; 5713 5714 // dfmt and nfmt can appear in either order, and each is optional. 5715 for (int I = 0; I < 2; ++I) { 5716 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5717 return MatchOperand_ParseFail; 5718 5719 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5720 return MatchOperand_ParseFail; 5721 } 5722 // Skip optional comma between dfmt/nfmt 5723 // but guard against 2 commas following each other. 5724 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5725 !peekToken().is(AsmToken::Comma)) { 5726 trySkipToken(AsmToken::Comma); 5727 } 5728 } 5729 5730 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5731 return MatchOperand_NoMatch; 5732 5733 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5734 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5735 5736 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5737 return MatchOperand_Success; 5738 } 5739 5740 OperandMatchResultTy 5741 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5742 using namespace llvm::AMDGPU::MTBUFFormat; 5743 5744 int64_t Fmt = UFMT_UNDEF; 5745 5746 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5747 return MatchOperand_ParseFail; 5748 5749 if (Fmt == UFMT_UNDEF) 5750 return MatchOperand_NoMatch; 5751 5752 Format = Fmt; 5753 return MatchOperand_Success; 5754 } 5755 5756 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5757 int64_t &Nfmt, 5758 StringRef FormatStr, 5759 SMLoc Loc) { 5760 using namespace llvm::AMDGPU::MTBUFFormat; 5761 int64_t Format; 5762 5763 Format = getDfmt(FormatStr); 5764 if (Format != DFMT_UNDEF) { 5765 Dfmt = Format; 5766 return true; 5767 } 5768 5769 Format = getNfmt(FormatStr, getSTI()); 5770 if (Format != NFMT_UNDEF) { 5771 Nfmt = Format; 5772 return true; 5773 } 5774 5775 Error(Loc, "unsupported format"); 5776 return false; 5777 } 5778 5779 OperandMatchResultTy 5780 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5781 SMLoc FormatLoc, 5782 int64_t &Format) { 5783 using namespace llvm::AMDGPU::MTBUFFormat; 5784 5785 int64_t Dfmt = DFMT_UNDEF; 5786 int64_t Nfmt = NFMT_UNDEF; 5787 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5788 return MatchOperand_ParseFail; 5789 5790 if (trySkipToken(AsmToken::Comma)) { 5791 StringRef Str; 5792 SMLoc Loc = getLoc(); 5793 if (!parseId(Str, "expected a format string") || 5794 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5795 return MatchOperand_ParseFail; 5796 } 5797 if (Dfmt == DFMT_UNDEF) { 5798 Error(Loc, "duplicate numeric format"); 5799 return MatchOperand_ParseFail; 5800 } else if (Nfmt == NFMT_UNDEF) { 5801 Error(Loc, "duplicate data format"); 5802 return MatchOperand_ParseFail; 5803 } 5804 } 5805 5806 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5807 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5808 5809 if (isGFX10Plus()) { 5810 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5811 if (Ufmt == UFMT_UNDEF) { 5812 Error(FormatLoc, "unsupported format"); 5813 return MatchOperand_ParseFail; 5814 } 5815 Format = Ufmt; 5816 } else { 5817 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5818 } 5819 5820 return MatchOperand_Success; 5821 } 5822 5823 OperandMatchResultTy 5824 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5825 SMLoc Loc, 5826 int64_t &Format) { 5827 using namespace llvm::AMDGPU::MTBUFFormat; 5828 5829 auto Id = getUnifiedFormat(FormatStr); 5830 if (Id == UFMT_UNDEF) 5831 return MatchOperand_NoMatch; 5832 5833 if (!isGFX10Plus()) { 5834 Error(Loc, "unified format is not supported on this GPU"); 5835 return MatchOperand_ParseFail; 5836 } 5837 5838 Format = Id; 5839 return MatchOperand_Success; 5840 } 5841 5842 OperandMatchResultTy 5843 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5844 using namespace llvm::AMDGPU::MTBUFFormat; 5845 SMLoc Loc = getLoc(); 5846 5847 if (!parseExpr(Format)) 5848 return MatchOperand_ParseFail; 5849 if (!isValidFormatEncoding(Format, getSTI())) { 5850 Error(Loc, "out of range format"); 5851 return MatchOperand_ParseFail; 5852 } 5853 5854 return MatchOperand_Success; 5855 } 5856 5857 OperandMatchResultTy 5858 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5859 using namespace llvm::AMDGPU::MTBUFFormat; 5860 5861 if (!trySkipId("format", AsmToken::Colon)) 5862 return MatchOperand_NoMatch; 5863 5864 if (trySkipToken(AsmToken::LBrac)) { 5865 StringRef FormatStr; 5866 SMLoc Loc = getLoc(); 5867 if (!parseId(FormatStr, "expected a format string")) 5868 return MatchOperand_ParseFail; 5869 5870 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5871 if (Res == MatchOperand_NoMatch) 5872 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5873 if (Res != MatchOperand_Success) 5874 return Res; 5875 5876 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5877 return MatchOperand_ParseFail; 5878 5879 return MatchOperand_Success; 5880 } 5881 5882 return parseNumericFormat(Format); 5883 } 5884 5885 OperandMatchResultTy 5886 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5887 using namespace llvm::AMDGPU::MTBUFFormat; 5888 5889 int64_t Format = getDefaultFormatEncoding(getSTI()); 5890 OperandMatchResultTy Res; 5891 SMLoc Loc = getLoc(); 5892 5893 // Parse legacy format syntax. 5894 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5895 if (Res == MatchOperand_ParseFail) 5896 return Res; 5897 5898 bool FormatFound = (Res == MatchOperand_Success); 5899 5900 Operands.push_back( 5901 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5902 5903 if (FormatFound) 5904 trySkipToken(AsmToken::Comma); 5905 5906 if (isToken(AsmToken::EndOfStatement)) { 5907 // We are expecting an soffset operand, 5908 // but let matcher handle the error. 5909 return MatchOperand_Success; 5910 } 5911 5912 // Parse soffset. 5913 Res = parseRegOrImm(Operands); 5914 if (Res != MatchOperand_Success) 5915 return Res; 5916 5917 trySkipToken(AsmToken::Comma); 5918 5919 if (!FormatFound) { 5920 Res = parseSymbolicOrNumericFormat(Format); 5921 if (Res == MatchOperand_ParseFail) 5922 return Res; 5923 if (Res == MatchOperand_Success) { 5924 auto Size = Operands.size(); 5925 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5926 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5927 Op.setImm(Format); 5928 } 5929 return MatchOperand_Success; 5930 } 5931 5932 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5933 Error(getLoc(), "duplicate format"); 5934 return MatchOperand_ParseFail; 5935 } 5936 return MatchOperand_Success; 5937 } 5938 5939 //===----------------------------------------------------------------------===// 5940 // ds 5941 //===----------------------------------------------------------------------===// 5942 5943 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5944 const OperandVector &Operands) { 5945 OptionalImmIndexMap OptionalIdx; 5946 5947 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5948 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5949 5950 // Add the register arguments 5951 if (Op.isReg()) { 5952 Op.addRegOperands(Inst, 1); 5953 continue; 5954 } 5955 5956 // Handle optional arguments 5957 OptionalIdx[Op.getImmTy()] = i; 5958 } 5959 5960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5961 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5963 5964 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5965 } 5966 5967 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5968 bool IsGdsHardcoded) { 5969 OptionalImmIndexMap OptionalIdx; 5970 5971 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5973 5974 // Add the register arguments 5975 if (Op.isReg()) { 5976 Op.addRegOperands(Inst, 1); 5977 continue; 5978 } 5979 5980 if (Op.isToken() && Op.getToken() == "gds") { 5981 IsGdsHardcoded = true; 5982 continue; 5983 } 5984 5985 // Handle optional arguments 5986 OptionalIdx[Op.getImmTy()] = i; 5987 } 5988 5989 AMDGPUOperand::ImmTy OffsetType = 5990 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5991 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5992 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5993 AMDGPUOperand::ImmTyOffset; 5994 5995 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5996 5997 if (!IsGdsHardcoded) { 5998 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5999 } 6000 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6001 } 6002 6003 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6004 OptionalImmIndexMap OptionalIdx; 6005 6006 unsigned OperandIdx[4]; 6007 unsigned EnMask = 0; 6008 int SrcIdx = 0; 6009 6010 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6011 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6012 6013 // Add the register arguments 6014 if (Op.isReg()) { 6015 assert(SrcIdx < 4); 6016 OperandIdx[SrcIdx] = Inst.size(); 6017 Op.addRegOperands(Inst, 1); 6018 ++SrcIdx; 6019 continue; 6020 } 6021 6022 if (Op.isOff()) { 6023 assert(SrcIdx < 4); 6024 OperandIdx[SrcIdx] = Inst.size(); 6025 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6026 ++SrcIdx; 6027 continue; 6028 } 6029 6030 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6031 Op.addImmOperands(Inst, 1); 6032 continue; 6033 } 6034 6035 if (Op.isToken() && Op.getToken() == "done") 6036 continue; 6037 6038 // Handle optional arguments 6039 OptionalIdx[Op.getImmTy()] = i; 6040 } 6041 6042 assert(SrcIdx == 4); 6043 6044 bool Compr = false; 6045 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6046 Compr = true; 6047 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6048 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6049 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6050 } 6051 6052 for (auto i = 0; i < SrcIdx; ++i) { 6053 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6054 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6055 } 6056 } 6057 6058 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6060 6061 Inst.addOperand(MCOperand::createImm(EnMask)); 6062 } 6063 6064 //===----------------------------------------------------------------------===// 6065 // s_waitcnt 6066 //===----------------------------------------------------------------------===// 6067 6068 static bool 6069 encodeCnt( 6070 const AMDGPU::IsaVersion ISA, 6071 int64_t &IntVal, 6072 int64_t CntVal, 6073 bool Saturate, 6074 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6075 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6076 { 6077 bool Failed = false; 6078 6079 IntVal = encode(ISA, IntVal, CntVal); 6080 if (CntVal != decode(ISA, IntVal)) { 6081 if (Saturate) { 6082 IntVal = encode(ISA, IntVal, -1); 6083 } else { 6084 Failed = true; 6085 } 6086 } 6087 return Failed; 6088 } 6089 6090 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6091 6092 SMLoc CntLoc = getLoc(); 6093 StringRef CntName = getTokenStr(); 6094 6095 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6096 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6097 return false; 6098 6099 int64_t CntVal; 6100 SMLoc ValLoc = getLoc(); 6101 if (!parseExpr(CntVal)) 6102 return false; 6103 6104 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6105 6106 bool Failed = true; 6107 bool Sat = CntName.endswith("_sat"); 6108 6109 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6110 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6111 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6112 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6113 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6114 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6115 } else { 6116 Error(CntLoc, "invalid counter name " + CntName); 6117 return false; 6118 } 6119 6120 if (Failed) { 6121 Error(ValLoc, "too large value for " + CntName); 6122 return false; 6123 } 6124 6125 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6126 return false; 6127 6128 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6129 if (isToken(AsmToken::EndOfStatement)) { 6130 Error(getLoc(), "expected a counter name"); 6131 return false; 6132 } 6133 } 6134 6135 return true; 6136 } 6137 6138 OperandMatchResultTy 6139 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6140 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6141 int64_t Waitcnt = getWaitcntBitMask(ISA); 6142 SMLoc S = getLoc(); 6143 6144 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6145 while (!isToken(AsmToken::EndOfStatement)) { 6146 if (!parseCnt(Waitcnt)) 6147 return MatchOperand_ParseFail; 6148 } 6149 } else { 6150 if (!parseExpr(Waitcnt)) 6151 return MatchOperand_ParseFail; 6152 } 6153 6154 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6155 return MatchOperand_Success; 6156 } 6157 6158 bool 6159 AMDGPUOperand::isSWaitCnt() const { 6160 return isImm(); 6161 } 6162 6163 //===----------------------------------------------------------------------===// 6164 // hwreg 6165 //===----------------------------------------------------------------------===// 6166 6167 bool 6168 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6169 OperandInfoTy &Offset, 6170 OperandInfoTy &Width) { 6171 using namespace llvm::AMDGPU::Hwreg; 6172 6173 // The register may be specified by name or using a numeric code 6174 HwReg.Loc = getLoc(); 6175 if (isToken(AsmToken::Identifier) && 6176 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6177 HwReg.IsSymbolic = true; 6178 lex(); // skip register name 6179 } else if (!parseExpr(HwReg.Id, "a register name")) { 6180 return false; 6181 } 6182 6183 if (trySkipToken(AsmToken::RParen)) 6184 return true; 6185 6186 // parse optional params 6187 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6188 return false; 6189 6190 Offset.Loc = getLoc(); 6191 if (!parseExpr(Offset.Id)) 6192 return false; 6193 6194 if (!skipToken(AsmToken::Comma, "expected a comma")) 6195 return false; 6196 6197 Width.Loc = getLoc(); 6198 return parseExpr(Width.Id) && 6199 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6200 } 6201 6202 bool 6203 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6204 const OperandInfoTy &Offset, 6205 const OperandInfoTy &Width) { 6206 6207 using namespace llvm::AMDGPU::Hwreg; 6208 6209 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6210 Error(HwReg.Loc, 6211 "specified hardware register is not supported on this GPU"); 6212 return false; 6213 } 6214 if (!isValidHwreg(HwReg.Id)) { 6215 Error(HwReg.Loc, 6216 "invalid code of hardware register: only 6-bit values are legal"); 6217 return false; 6218 } 6219 if (!isValidHwregOffset(Offset.Id)) { 6220 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6221 return false; 6222 } 6223 if (!isValidHwregWidth(Width.Id)) { 6224 Error(Width.Loc, 6225 "invalid bitfield width: only values from 1 to 32 are legal"); 6226 return false; 6227 } 6228 return true; 6229 } 6230 6231 OperandMatchResultTy 6232 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6233 using namespace llvm::AMDGPU::Hwreg; 6234 6235 int64_t ImmVal = 0; 6236 SMLoc Loc = getLoc(); 6237 6238 if (trySkipId("hwreg", AsmToken::LParen)) { 6239 OperandInfoTy HwReg(ID_UNKNOWN_); 6240 OperandInfoTy Offset(OFFSET_DEFAULT_); 6241 OperandInfoTy Width(WIDTH_DEFAULT_); 6242 if (parseHwregBody(HwReg, Offset, Width) && 6243 validateHwreg(HwReg, Offset, Width)) { 6244 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6245 } else { 6246 return MatchOperand_ParseFail; 6247 } 6248 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6249 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6250 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6251 return MatchOperand_ParseFail; 6252 } 6253 } else { 6254 return MatchOperand_ParseFail; 6255 } 6256 6257 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6258 return MatchOperand_Success; 6259 } 6260 6261 bool AMDGPUOperand::isHwreg() const { 6262 return isImmTy(ImmTyHwreg); 6263 } 6264 6265 //===----------------------------------------------------------------------===// 6266 // sendmsg 6267 //===----------------------------------------------------------------------===// 6268 6269 bool 6270 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6271 OperandInfoTy &Op, 6272 OperandInfoTy &Stream) { 6273 using namespace llvm::AMDGPU::SendMsg; 6274 6275 Msg.Loc = getLoc(); 6276 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6277 Msg.IsSymbolic = true; 6278 lex(); // skip message name 6279 } else if (!parseExpr(Msg.Id, "a message name")) { 6280 return false; 6281 } 6282 6283 if (trySkipToken(AsmToken::Comma)) { 6284 Op.IsDefined = true; 6285 Op.Loc = getLoc(); 6286 if (isToken(AsmToken::Identifier) && 6287 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6288 lex(); // skip operation name 6289 } else if (!parseExpr(Op.Id, "an operation name")) { 6290 return false; 6291 } 6292 6293 if (trySkipToken(AsmToken::Comma)) { 6294 Stream.IsDefined = true; 6295 Stream.Loc = getLoc(); 6296 if (!parseExpr(Stream.Id)) 6297 return false; 6298 } 6299 } 6300 6301 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6302 } 6303 6304 bool 6305 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6306 const OperandInfoTy &Op, 6307 const OperandInfoTy &Stream) { 6308 using namespace llvm::AMDGPU::SendMsg; 6309 6310 // Validation strictness depends on whether message is specified 6311 // in a symbolc or in a numeric form. In the latter case 6312 // only encoding possibility is checked. 6313 bool Strict = Msg.IsSymbolic; 6314 6315 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6316 Error(Msg.Loc, "invalid message id"); 6317 return false; 6318 } 6319 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6320 if (Op.IsDefined) { 6321 Error(Op.Loc, "message does not support operations"); 6322 } else { 6323 Error(Msg.Loc, "missing message operation"); 6324 } 6325 return false; 6326 } 6327 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6328 Error(Op.Loc, "invalid operation id"); 6329 return false; 6330 } 6331 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6332 Error(Stream.Loc, "message operation does not support streams"); 6333 return false; 6334 } 6335 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6336 Error(Stream.Loc, "invalid message stream id"); 6337 return false; 6338 } 6339 return true; 6340 } 6341 6342 OperandMatchResultTy 6343 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6344 using namespace llvm::AMDGPU::SendMsg; 6345 6346 int64_t ImmVal = 0; 6347 SMLoc Loc = getLoc(); 6348 6349 if (trySkipId("sendmsg", AsmToken::LParen)) { 6350 OperandInfoTy Msg(ID_UNKNOWN_); 6351 OperandInfoTy Op(OP_NONE_); 6352 OperandInfoTy Stream(STREAM_ID_NONE_); 6353 if (parseSendMsgBody(Msg, Op, Stream) && 6354 validateSendMsg(Msg, Op, Stream)) { 6355 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6356 } else { 6357 return MatchOperand_ParseFail; 6358 } 6359 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6360 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6361 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6362 return MatchOperand_ParseFail; 6363 } 6364 } else { 6365 return MatchOperand_ParseFail; 6366 } 6367 6368 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6369 return MatchOperand_Success; 6370 } 6371 6372 bool AMDGPUOperand::isSendMsg() const { 6373 return isImmTy(ImmTySendMsg); 6374 } 6375 6376 //===----------------------------------------------------------------------===// 6377 // v_interp 6378 //===----------------------------------------------------------------------===// 6379 6380 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6381 StringRef Str; 6382 SMLoc S = getLoc(); 6383 6384 if (!parseId(Str)) 6385 return MatchOperand_NoMatch; 6386 6387 int Slot = StringSwitch<int>(Str) 6388 .Case("p10", 0) 6389 .Case("p20", 1) 6390 .Case("p0", 2) 6391 .Default(-1); 6392 6393 if (Slot == -1) { 6394 Error(S, "invalid interpolation slot"); 6395 return MatchOperand_ParseFail; 6396 } 6397 6398 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6399 AMDGPUOperand::ImmTyInterpSlot)); 6400 return MatchOperand_Success; 6401 } 6402 6403 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6404 StringRef Str; 6405 SMLoc S = getLoc(); 6406 6407 if (!parseId(Str)) 6408 return MatchOperand_NoMatch; 6409 6410 if (!Str.startswith("attr")) { 6411 Error(S, "invalid interpolation attribute"); 6412 return MatchOperand_ParseFail; 6413 } 6414 6415 StringRef Chan = Str.take_back(2); 6416 int AttrChan = StringSwitch<int>(Chan) 6417 .Case(".x", 0) 6418 .Case(".y", 1) 6419 .Case(".z", 2) 6420 .Case(".w", 3) 6421 .Default(-1); 6422 if (AttrChan == -1) { 6423 Error(S, "invalid or missing interpolation attribute channel"); 6424 return MatchOperand_ParseFail; 6425 } 6426 6427 Str = Str.drop_back(2).drop_front(4); 6428 6429 uint8_t Attr; 6430 if (Str.getAsInteger(10, Attr)) { 6431 Error(S, "invalid or missing interpolation attribute number"); 6432 return MatchOperand_ParseFail; 6433 } 6434 6435 if (Attr > 63) { 6436 Error(S, "out of bounds interpolation attribute number"); 6437 return MatchOperand_ParseFail; 6438 } 6439 6440 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6441 6442 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6443 AMDGPUOperand::ImmTyInterpAttr)); 6444 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6445 AMDGPUOperand::ImmTyAttrChan)); 6446 return MatchOperand_Success; 6447 } 6448 6449 //===----------------------------------------------------------------------===// 6450 // exp 6451 //===----------------------------------------------------------------------===// 6452 6453 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6454 using namespace llvm::AMDGPU::Exp; 6455 6456 StringRef Str; 6457 SMLoc S = getLoc(); 6458 6459 if (!parseId(Str)) 6460 return MatchOperand_NoMatch; 6461 6462 unsigned Id = getTgtId(Str); 6463 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6464 Error(S, (Id == ET_INVALID) ? 6465 "invalid exp target" : 6466 "exp target is not supported on this GPU"); 6467 return MatchOperand_ParseFail; 6468 } 6469 6470 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6471 AMDGPUOperand::ImmTyExpTgt)); 6472 return MatchOperand_Success; 6473 } 6474 6475 //===----------------------------------------------------------------------===// 6476 // parser helpers 6477 //===----------------------------------------------------------------------===// 6478 6479 bool 6480 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6481 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6482 } 6483 6484 bool 6485 AMDGPUAsmParser::isId(const StringRef Id) const { 6486 return isId(getToken(), Id); 6487 } 6488 6489 bool 6490 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6491 return getTokenKind() == Kind; 6492 } 6493 6494 bool 6495 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6496 if (isId(Id)) { 6497 lex(); 6498 return true; 6499 } 6500 return false; 6501 } 6502 6503 bool 6504 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6505 if (isToken(AsmToken::Identifier)) { 6506 StringRef Tok = getTokenStr(); 6507 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6508 lex(); 6509 return true; 6510 } 6511 } 6512 return false; 6513 } 6514 6515 bool 6516 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6517 if (isId(Id) && peekToken().is(Kind)) { 6518 lex(); 6519 lex(); 6520 return true; 6521 } 6522 return false; 6523 } 6524 6525 bool 6526 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6527 if (isToken(Kind)) { 6528 lex(); 6529 return true; 6530 } 6531 return false; 6532 } 6533 6534 bool 6535 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6536 const StringRef ErrMsg) { 6537 if (!trySkipToken(Kind)) { 6538 Error(getLoc(), ErrMsg); 6539 return false; 6540 } 6541 return true; 6542 } 6543 6544 bool 6545 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6546 SMLoc S = getLoc(); 6547 6548 const MCExpr *Expr; 6549 if (Parser.parseExpression(Expr)) 6550 return false; 6551 6552 if (Expr->evaluateAsAbsolute(Imm)) 6553 return true; 6554 6555 if (Expected.empty()) { 6556 Error(S, "expected absolute expression"); 6557 } else { 6558 Error(S, Twine("expected ", Expected) + 6559 Twine(" or an absolute expression")); 6560 } 6561 return false; 6562 } 6563 6564 bool 6565 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6566 SMLoc S = getLoc(); 6567 6568 const MCExpr *Expr; 6569 if (Parser.parseExpression(Expr)) 6570 return false; 6571 6572 int64_t IntVal; 6573 if (Expr->evaluateAsAbsolute(IntVal)) { 6574 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6575 } else { 6576 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6577 } 6578 return true; 6579 } 6580 6581 bool 6582 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6583 if (isToken(AsmToken::String)) { 6584 Val = getToken().getStringContents(); 6585 lex(); 6586 return true; 6587 } else { 6588 Error(getLoc(), ErrMsg); 6589 return false; 6590 } 6591 } 6592 6593 bool 6594 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6595 if (isToken(AsmToken::Identifier)) { 6596 Val = getTokenStr(); 6597 lex(); 6598 return true; 6599 } else { 6600 if (!ErrMsg.empty()) 6601 Error(getLoc(), ErrMsg); 6602 return false; 6603 } 6604 } 6605 6606 AsmToken 6607 AMDGPUAsmParser::getToken() const { 6608 return Parser.getTok(); 6609 } 6610 6611 AsmToken 6612 AMDGPUAsmParser::peekToken() { 6613 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6614 } 6615 6616 void 6617 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6618 auto TokCount = getLexer().peekTokens(Tokens); 6619 6620 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6621 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6622 } 6623 6624 AsmToken::TokenKind 6625 AMDGPUAsmParser::getTokenKind() const { 6626 return getLexer().getKind(); 6627 } 6628 6629 SMLoc 6630 AMDGPUAsmParser::getLoc() const { 6631 return getToken().getLoc(); 6632 } 6633 6634 StringRef 6635 AMDGPUAsmParser::getTokenStr() const { 6636 return getToken().getString(); 6637 } 6638 6639 void 6640 AMDGPUAsmParser::lex() { 6641 Parser.Lex(); 6642 } 6643 6644 SMLoc 6645 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6646 const OperandVector &Operands) const { 6647 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6648 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6649 if (Test(Op)) 6650 return Op.getStartLoc(); 6651 } 6652 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6653 } 6654 6655 SMLoc 6656 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6657 const OperandVector &Operands) const { 6658 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6659 return getOperandLoc(Test, Operands); 6660 } 6661 6662 SMLoc 6663 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6664 const OperandVector &Operands) const { 6665 auto Test = [=](const AMDGPUOperand& Op) { 6666 return Op.isRegKind() && Op.getReg() == Reg; 6667 }; 6668 return getOperandLoc(Test, Operands); 6669 } 6670 6671 SMLoc 6672 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6673 auto Test = [](const AMDGPUOperand& Op) { 6674 return Op.IsImmKindLiteral() || Op.isExpr(); 6675 }; 6676 return getOperandLoc(Test, Operands); 6677 } 6678 6679 SMLoc 6680 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6681 auto Test = [](const AMDGPUOperand& Op) { 6682 return Op.isImmKindConst(); 6683 }; 6684 return getOperandLoc(Test, Operands); 6685 } 6686 6687 //===----------------------------------------------------------------------===// 6688 // swizzle 6689 //===----------------------------------------------------------------------===// 6690 6691 LLVM_READNONE 6692 static unsigned 6693 encodeBitmaskPerm(const unsigned AndMask, 6694 const unsigned OrMask, 6695 const unsigned XorMask) { 6696 using namespace llvm::AMDGPU::Swizzle; 6697 6698 return BITMASK_PERM_ENC | 6699 (AndMask << BITMASK_AND_SHIFT) | 6700 (OrMask << BITMASK_OR_SHIFT) | 6701 (XorMask << BITMASK_XOR_SHIFT); 6702 } 6703 6704 bool 6705 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6706 const unsigned MinVal, 6707 const unsigned MaxVal, 6708 const StringRef ErrMsg, 6709 SMLoc &Loc) { 6710 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6711 return false; 6712 } 6713 Loc = getLoc(); 6714 if (!parseExpr(Op)) { 6715 return false; 6716 } 6717 if (Op < MinVal || Op > MaxVal) { 6718 Error(Loc, ErrMsg); 6719 return false; 6720 } 6721 6722 return true; 6723 } 6724 6725 bool 6726 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6727 const unsigned MinVal, 6728 const unsigned MaxVal, 6729 const StringRef ErrMsg) { 6730 SMLoc Loc; 6731 for (unsigned i = 0; i < OpNum; ++i) { 6732 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6733 return false; 6734 } 6735 6736 return true; 6737 } 6738 6739 bool 6740 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6741 using namespace llvm::AMDGPU::Swizzle; 6742 6743 int64_t Lane[LANE_NUM]; 6744 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6745 "expected a 2-bit lane id")) { 6746 Imm = QUAD_PERM_ENC; 6747 for (unsigned I = 0; I < LANE_NUM; ++I) { 6748 Imm |= Lane[I] << (LANE_SHIFT * I); 6749 } 6750 return true; 6751 } 6752 return false; 6753 } 6754 6755 bool 6756 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6757 using namespace llvm::AMDGPU::Swizzle; 6758 6759 SMLoc Loc; 6760 int64_t GroupSize; 6761 int64_t LaneIdx; 6762 6763 if (!parseSwizzleOperand(GroupSize, 6764 2, 32, 6765 "group size must be in the interval [2,32]", 6766 Loc)) { 6767 return false; 6768 } 6769 if (!isPowerOf2_64(GroupSize)) { 6770 Error(Loc, "group size must be a power of two"); 6771 return false; 6772 } 6773 if (parseSwizzleOperand(LaneIdx, 6774 0, GroupSize - 1, 6775 "lane id must be in the interval [0,group size - 1]", 6776 Loc)) { 6777 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6778 return true; 6779 } 6780 return false; 6781 } 6782 6783 bool 6784 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6785 using namespace llvm::AMDGPU::Swizzle; 6786 6787 SMLoc Loc; 6788 int64_t GroupSize; 6789 6790 if (!parseSwizzleOperand(GroupSize, 6791 2, 32, 6792 "group size must be in the interval [2,32]", 6793 Loc)) { 6794 return false; 6795 } 6796 if (!isPowerOf2_64(GroupSize)) { 6797 Error(Loc, "group size must be a power of two"); 6798 return false; 6799 } 6800 6801 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6802 return true; 6803 } 6804 6805 bool 6806 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6807 using namespace llvm::AMDGPU::Swizzle; 6808 6809 SMLoc Loc; 6810 int64_t GroupSize; 6811 6812 if (!parseSwizzleOperand(GroupSize, 6813 1, 16, 6814 "group size must be in the interval [1,16]", 6815 Loc)) { 6816 return false; 6817 } 6818 if (!isPowerOf2_64(GroupSize)) { 6819 Error(Loc, "group size must be a power of two"); 6820 return false; 6821 } 6822 6823 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6824 return true; 6825 } 6826 6827 bool 6828 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6829 using namespace llvm::AMDGPU::Swizzle; 6830 6831 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6832 return false; 6833 } 6834 6835 StringRef Ctl; 6836 SMLoc StrLoc = getLoc(); 6837 if (!parseString(Ctl)) { 6838 return false; 6839 } 6840 if (Ctl.size() != BITMASK_WIDTH) { 6841 Error(StrLoc, "expected a 5-character mask"); 6842 return false; 6843 } 6844 6845 unsigned AndMask = 0; 6846 unsigned OrMask = 0; 6847 unsigned XorMask = 0; 6848 6849 for (size_t i = 0; i < Ctl.size(); ++i) { 6850 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6851 switch(Ctl[i]) { 6852 default: 6853 Error(StrLoc, "invalid mask"); 6854 return false; 6855 case '0': 6856 break; 6857 case '1': 6858 OrMask |= Mask; 6859 break; 6860 case 'p': 6861 AndMask |= Mask; 6862 break; 6863 case 'i': 6864 AndMask |= Mask; 6865 XorMask |= Mask; 6866 break; 6867 } 6868 } 6869 6870 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6871 return true; 6872 } 6873 6874 bool 6875 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6876 6877 SMLoc OffsetLoc = getLoc(); 6878 6879 if (!parseExpr(Imm, "a swizzle macro")) { 6880 return false; 6881 } 6882 if (!isUInt<16>(Imm)) { 6883 Error(OffsetLoc, "expected a 16-bit offset"); 6884 return false; 6885 } 6886 return true; 6887 } 6888 6889 bool 6890 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6891 using namespace llvm::AMDGPU::Swizzle; 6892 6893 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6894 6895 SMLoc ModeLoc = getLoc(); 6896 bool Ok = false; 6897 6898 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6899 Ok = parseSwizzleQuadPerm(Imm); 6900 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6901 Ok = parseSwizzleBitmaskPerm(Imm); 6902 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6903 Ok = parseSwizzleBroadcast(Imm); 6904 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6905 Ok = parseSwizzleSwap(Imm); 6906 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6907 Ok = parseSwizzleReverse(Imm); 6908 } else { 6909 Error(ModeLoc, "expected a swizzle mode"); 6910 } 6911 6912 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6913 } 6914 6915 return false; 6916 } 6917 6918 OperandMatchResultTy 6919 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6920 SMLoc S = getLoc(); 6921 int64_t Imm = 0; 6922 6923 if (trySkipId("offset")) { 6924 6925 bool Ok = false; 6926 if (skipToken(AsmToken::Colon, "expected a colon")) { 6927 if (trySkipId("swizzle")) { 6928 Ok = parseSwizzleMacro(Imm); 6929 } else { 6930 Ok = parseSwizzleOffset(Imm); 6931 } 6932 } 6933 6934 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6935 6936 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6937 } else { 6938 // Swizzle "offset" operand is optional. 6939 // If it is omitted, try parsing other optional operands. 6940 return parseOptionalOpr(Operands); 6941 } 6942 } 6943 6944 bool 6945 AMDGPUOperand::isSwizzle() const { 6946 return isImmTy(ImmTySwizzle); 6947 } 6948 6949 //===----------------------------------------------------------------------===// 6950 // VGPR Index Mode 6951 //===----------------------------------------------------------------------===// 6952 6953 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6954 6955 using namespace llvm::AMDGPU::VGPRIndexMode; 6956 6957 if (trySkipToken(AsmToken::RParen)) { 6958 return OFF; 6959 } 6960 6961 int64_t Imm = 0; 6962 6963 while (true) { 6964 unsigned Mode = 0; 6965 SMLoc S = getLoc(); 6966 6967 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6968 if (trySkipId(IdSymbolic[ModeId])) { 6969 Mode = 1 << ModeId; 6970 break; 6971 } 6972 } 6973 6974 if (Mode == 0) { 6975 Error(S, (Imm == 0)? 6976 "expected a VGPR index mode or a closing parenthesis" : 6977 "expected a VGPR index mode"); 6978 return UNDEF; 6979 } 6980 6981 if (Imm & Mode) { 6982 Error(S, "duplicate VGPR index mode"); 6983 return UNDEF; 6984 } 6985 Imm |= Mode; 6986 6987 if (trySkipToken(AsmToken::RParen)) 6988 break; 6989 if (!skipToken(AsmToken::Comma, 6990 "expected a comma or a closing parenthesis")) 6991 return UNDEF; 6992 } 6993 6994 return Imm; 6995 } 6996 6997 OperandMatchResultTy 6998 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6999 7000 using namespace llvm::AMDGPU::VGPRIndexMode; 7001 7002 int64_t Imm = 0; 7003 SMLoc S = getLoc(); 7004 7005 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7006 Imm = parseGPRIdxMacro(); 7007 if (Imm == UNDEF) 7008 return MatchOperand_ParseFail; 7009 } else { 7010 if (getParser().parseAbsoluteExpression(Imm)) 7011 return MatchOperand_ParseFail; 7012 if (Imm < 0 || !isUInt<4>(Imm)) { 7013 Error(S, "invalid immediate: only 4-bit values are legal"); 7014 return MatchOperand_ParseFail; 7015 } 7016 } 7017 7018 Operands.push_back( 7019 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7020 return MatchOperand_Success; 7021 } 7022 7023 bool AMDGPUOperand::isGPRIdxMode() const { 7024 return isImmTy(ImmTyGprIdxMode); 7025 } 7026 7027 //===----------------------------------------------------------------------===// 7028 // sopp branch targets 7029 //===----------------------------------------------------------------------===// 7030 7031 OperandMatchResultTy 7032 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7033 7034 // Make sure we are not parsing something 7035 // that looks like a label or an expression but is not. 7036 // This will improve error messages. 7037 if (isRegister() || isModifier()) 7038 return MatchOperand_NoMatch; 7039 7040 if (!parseExpr(Operands)) 7041 return MatchOperand_ParseFail; 7042 7043 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7044 assert(Opr.isImm() || Opr.isExpr()); 7045 SMLoc Loc = Opr.getStartLoc(); 7046 7047 // Currently we do not support arbitrary expressions as branch targets. 7048 // Only labels and absolute expressions are accepted. 7049 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7050 Error(Loc, "expected an absolute expression or a label"); 7051 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7052 Error(Loc, "expected a 16-bit signed jump offset"); 7053 } 7054 7055 return MatchOperand_Success; 7056 } 7057 7058 //===----------------------------------------------------------------------===// 7059 // Boolean holding registers 7060 //===----------------------------------------------------------------------===// 7061 7062 OperandMatchResultTy 7063 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7064 return parseReg(Operands); 7065 } 7066 7067 //===----------------------------------------------------------------------===// 7068 // mubuf 7069 //===----------------------------------------------------------------------===// 7070 7071 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7072 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7073 } 7074 7075 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7076 const OperandVector &Operands, 7077 bool IsAtomic, 7078 bool IsLds) { 7079 bool IsLdsOpcode = IsLds; 7080 bool HasLdsModifier = false; 7081 OptionalImmIndexMap OptionalIdx; 7082 unsigned FirstOperandIdx = 1; 7083 bool IsAtomicReturn = false; 7084 7085 if (IsAtomic) { 7086 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7088 if (!Op.isCPol()) 7089 continue; 7090 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7091 break; 7092 } 7093 7094 if (!IsAtomicReturn) { 7095 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7096 if (NewOpc != -1) 7097 Inst.setOpcode(NewOpc); 7098 } 7099 7100 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7101 SIInstrFlags::IsAtomicRet; 7102 } 7103 7104 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7105 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7106 7107 // Add the register arguments 7108 if (Op.isReg()) { 7109 Op.addRegOperands(Inst, 1); 7110 // Insert a tied src for atomic return dst. 7111 // This cannot be postponed as subsequent calls to 7112 // addImmOperands rely on correct number of MC operands. 7113 if (IsAtomicReturn && i == FirstOperandIdx) 7114 Op.addRegOperands(Inst, 1); 7115 continue; 7116 } 7117 7118 // Handle the case where soffset is an immediate 7119 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7120 Op.addImmOperands(Inst, 1); 7121 continue; 7122 } 7123 7124 HasLdsModifier |= Op.isLDS(); 7125 7126 // Handle tokens like 'offen' which are sometimes hard-coded into the 7127 // asm string. There are no MCInst operands for these. 7128 if (Op.isToken()) { 7129 continue; 7130 } 7131 assert(Op.isImm()); 7132 7133 // Handle optional arguments 7134 OptionalIdx[Op.getImmTy()] = i; 7135 } 7136 7137 // This is a workaround for an llvm quirk which may result in an 7138 // incorrect instruction selection. Lds and non-lds versions of 7139 // MUBUF instructions are identical except that lds versions 7140 // have mandatory 'lds' modifier. However this modifier follows 7141 // optional modifiers and llvm asm matcher regards this 'lds' 7142 // modifier as an optional one. As a result, an lds version 7143 // of opcode may be selected even if it has no 'lds' modifier. 7144 if (IsLdsOpcode && !HasLdsModifier) { 7145 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7146 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7147 Inst.setOpcode(NoLdsOpcode); 7148 IsLdsOpcode = false; 7149 } 7150 } 7151 7152 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7154 7155 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7157 } 7158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7159 } 7160 7161 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7162 OptionalImmIndexMap OptionalIdx; 7163 7164 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7165 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7166 7167 // Add the register arguments 7168 if (Op.isReg()) { 7169 Op.addRegOperands(Inst, 1); 7170 continue; 7171 } 7172 7173 // Handle the case where soffset is an immediate 7174 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7175 Op.addImmOperands(Inst, 1); 7176 continue; 7177 } 7178 7179 // Handle tokens like 'offen' which are sometimes hard-coded into the 7180 // asm string. There are no MCInst operands for these. 7181 if (Op.isToken()) { 7182 continue; 7183 } 7184 assert(Op.isImm()); 7185 7186 // Handle optional arguments 7187 OptionalIdx[Op.getImmTy()] = i; 7188 } 7189 7190 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7191 AMDGPUOperand::ImmTyOffset); 7192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7196 } 7197 7198 //===----------------------------------------------------------------------===// 7199 // mimg 7200 //===----------------------------------------------------------------------===// 7201 7202 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7203 bool IsAtomic) { 7204 unsigned I = 1; 7205 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7206 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7207 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7208 } 7209 7210 if (IsAtomic) { 7211 // Add src, same as dst 7212 assert(Desc.getNumDefs() == 1); 7213 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7214 } 7215 7216 OptionalImmIndexMap OptionalIdx; 7217 7218 for (unsigned E = Operands.size(); I != E; ++I) { 7219 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7220 7221 // Add the register arguments 7222 if (Op.isReg()) { 7223 Op.addRegOperands(Inst, 1); 7224 } else if (Op.isImmModifier()) { 7225 OptionalIdx[Op.getImmTy()] = I; 7226 } else if (!Op.isToken()) { 7227 llvm_unreachable("unexpected operand type"); 7228 } 7229 } 7230 7231 bool IsGFX10Plus = isGFX10Plus(); 7232 7233 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7234 if (IsGFX10Plus) 7235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7236 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7238 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7239 if (IsGFX10Plus) 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7241 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7244 if (!IsGFX10Plus) 7245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7247 } 7248 7249 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7250 cvtMIMG(Inst, Operands, true); 7251 } 7252 7253 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7254 OptionalImmIndexMap OptionalIdx; 7255 bool IsAtomicReturn = false; 7256 7257 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7258 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7259 if (!Op.isCPol()) 7260 continue; 7261 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7262 break; 7263 } 7264 7265 if (!IsAtomicReturn) { 7266 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7267 if (NewOpc != -1) 7268 Inst.setOpcode(NewOpc); 7269 } 7270 7271 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7272 SIInstrFlags::IsAtomicRet; 7273 7274 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7276 7277 // Add the register arguments 7278 if (Op.isReg()) { 7279 Op.addRegOperands(Inst, 1); 7280 if (IsAtomicReturn && i == 1) 7281 Op.addRegOperands(Inst, 1); 7282 continue; 7283 } 7284 7285 // Handle the case where soffset is an immediate 7286 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7287 Op.addImmOperands(Inst, 1); 7288 continue; 7289 } 7290 7291 // Handle tokens like 'offen' which are sometimes hard-coded into the 7292 // asm string. There are no MCInst operands for these. 7293 if (Op.isToken()) { 7294 continue; 7295 } 7296 assert(Op.isImm()); 7297 7298 // Handle optional arguments 7299 OptionalIdx[Op.getImmTy()] = i; 7300 } 7301 7302 if ((int)Inst.getNumOperands() <= 7303 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7304 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7306 } 7307 7308 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7309 const OperandVector &Operands) { 7310 for (unsigned I = 1; I < Operands.size(); ++I) { 7311 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7312 if (Operand.isReg()) 7313 Operand.addRegOperands(Inst, 1); 7314 } 7315 7316 Inst.addOperand(MCOperand::createImm(1)); // a16 7317 } 7318 7319 //===----------------------------------------------------------------------===// 7320 // smrd 7321 //===----------------------------------------------------------------------===// 7322 7323 bool AMDGPUOperand::isSMRDOffset8() const { 7324 return isImm() && isUInt<8>(getImm()); 7325 } 7326 7327 bool AMDGPUOperand::isSMEMOffset() const { 7328 return isImm(); // Offset range is checked later by validator. 7329 } 7330 7331 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7332 // 32-bit literals are only supported on CI and we only want to use them 7333 // when the offset is > 8-bits. 7334 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7335 } 7336 7337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7338 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7339 } 7340 7341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7342 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7343 } 7344 7345 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7346 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7347 } 7348 7349 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7350 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7351 } 7352 7353 //===----------------------------------------------------------------------===// 7354 // vop3 7355 //===----------------------------------------------------------------------===// 7356 7357 static bool ConvertOmodMul(int64_t &Mul) { 7358 if (Mul != 1 && Mul != 2 && Mul != 4) 7359 return false; 7360 7361 Mul >>= 1; 7362 return true; 7363 } 7364 7365 static bool ConvertOmodDiv(int64_t &Div) { 7366 if (Div == 1) { 7367 Div = 0; 7368 return true; 7369 } 7370 7371 if (Div == 2) { 7372 Div = 3; 7373 return true; 7374 } 7375 7376 return false; 7377 } 7378 7379 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7380 // This is intentional and ensures compatibility with sp3. 7381 // See bug 35397 for details. 7382 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7383 if (BoundCtrl == 0 || BoundCtrl == 1) { 7384 BoundCtrl = 1; 7385 return true; 7386 } 7387 return false; 7388 } 7389 7390 // Note: the order in this table matches the order of operands in AsmString. 7391 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7392 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7393 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7394 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7395 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7396 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7397 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7398 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7399 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7400 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7401 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7402 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7403 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7404 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7405 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7406 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7407 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7408 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7409 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7410 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7411 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7412 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7413 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7414 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7415 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7416 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7417 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7418 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7419 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7420 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7421 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7422 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7423 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7424 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7425 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7426 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7427 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7428 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7429 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7430 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7431 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7432 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7433 }; 7434 7435 void AMDGPUAsmParser::onBeginOfFile() { 7436 if (!getParser().getStreamer().getTargetStreamer() || 7437 getSTI().getTargetTriple().getArch() == Triple::r600) 7438 return; 7439 7440 if (!getTargetStreamer().getTargetID()) 7441 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7442 7443 if (isHsaAbiVersion3Or4(&getSTI())) 7444 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7445 } 7446 7447 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7448 7449 OperandMatchResultTy res = parseOptionalOpr(Operands); 7450 7451 // This is a hack to enable hardcoded mandatory operands which follow 7452 // optional operands. 7453 // 7454 // Current design assumes that all operands after the first optional operand 7455 // are also optional. However implementation of some instructions violates 7456 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7457 // 7458 // To alleviate this problem, we have to (implicitly) parse extra operands 7459 // to make sure autogenerated parser of custom operands never hit hardcoded 7460 // mandatory operands. 7461 7462 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7463 if (res != MatchOperand_Success || 7464 isToken(AsmToken::EndOfStatement)) 7465 break; 7466 7467 trySkipToken(AsmToken::Comma); 7468 res = parseOptionalOpr(Operands); 7469 } 7470 7471 return res; 7472 } 7473 7474 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7475 OperandMatchResultTy res; 7476 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7477 // try to parse any optional operand here 7478 if (Op.IsBit) { 7479 res = parseNamedBit(Op.Name, Operands, Op.Type); 7480 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7481 res = parseOModOperand(Operands); 7482 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7483 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7484 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7485 res = parseSDWASel(Operands, Op.Name, Op.Type); 7486 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7487 res = parseSDWADstUnused(Operands); 7488 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7489 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7490 Op.Type == AMDGPUOperand::ImmTyNegLo || 7491 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7492 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7493 Op.ConvertResult); 7494 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7495 res = parseDim(Operands); 7496 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7497 res = parseCPol(Operands); 7498 } else { 7499 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7500 } 7501 if (res != MatchOperand_NoMatch) { 7502 return res; 7503 } 7504 } 7505 return MatchOperand_NoMatch; 7506 } 7507 7508 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7509 StringRef Name = getTokenStr(); 7510 if (Name == "mul") { 7511 return parseIntWithPrefix("mul", Operands, 7512 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7513 } 7514 7515 if (Name == "div") { 7516 return parseIntWithPrefix("div", Operands, 7517 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7518 } 7519 7520 return MatchOperand_NoMatch; 7521 } 7522 7523 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7524 cvtVOP3P(Inst, Operands); 7525 7526 int Opc = Inst.getOpcode(); 7527 7528 int SrcNum; 7529 const int Ops[] = { AMDGPU::OpName::src0, 7530 AMDGPU::OpName::src1, 7531 AMDGPU::OpName::src2 }; 7532 for (SrcNum = 0; 7533 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7534 ++SrcNum); 7535 assert(SrcNum > 0); 7536 7537 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7538 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7539 7540 if ((OpSel & (1 << SrcNum)) != 0) { 7541 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7542 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7543 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7544 } 7545 } 7546 7547 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7548 // 1. This operand is input modifiers 7549 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7550 // 2. This is not last operand 7551 && Desc.NumOperands > (OpNum + 1) 7552 // 3. Next operand is register class 7553 && Desc.OpInfo[OpNum + 1].RegClass != -1 7554 // 4. Next register is not tied to any other operand 7555 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7556 } 7557 7558 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7559 { 7560 OptionalImmIndexMap OptionalIdx; 7561 unsigned Opc = Inst.getOpcode(); 7562 7563 unsigned I = 1; 7564 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7565 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7566 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7567 } 7568 7569 for (unsigned E = Operands.size(); I != E; ++I) { 7570 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7571 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7572 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7573 } else if (Op.isInterpSlot() || 7574 Op.isInterpAttr() || 7575 Op.isAttrChan()) { 7576 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7577 } else if (Op.isImmModifier()) { 7578 OptionalIdx[Op.getImmTy()] = I; 7579 } else { 7580 llvm_unreachable("unhandled operand type"); 7581 } 7582 } 7583 7584 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7586 } 7587 7588 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7589 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7590 } 7591 7592 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7594 } 7595 } 7596 7597 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7598 OptionalImmIndexMap &OptionalIdx) { 7599 unsigned Opc = Inst.getOpcode(); 7600 7601 unsigned I = 1; 7602 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7603 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7604 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7605 } 7606 7607 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7608 // This instruction has src modifiers 7609 for (unsigned E = Operands.size(); I != E; ++I) { 7610 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7611 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7612 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7613 } else if (Op.isImmModifier()) { 7614 OptionalIdx[Op.getImmTy()] = I; 7615 } else if (Op.isRegOrImm()) { 7616 Op.addRegOrImmOperands(Inst, 1); 7617 } else { 7618 llvm_unreachable("unhandled operand type"); 7619 } 7620 } 7621 } else { 7622 // No src modifiers 7623 for (unsigned E = Operands.size(); I != E; ++I) { 7624 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7625 if (Op.isMod()) { 7626 OptionalIdx[Op.getImmTy()] = I; 7627 } else { 7628 Op.addRegOrImmOperands(Inst, 1); 7629 } 7630 } 7631 } 7632 7633 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7635 } 7636 7637 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7639 } 7640 7641 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7642 // it has src2 register operand that is tied to dst operand 7643 // we don't allow modifiers for this operand in assembler so src2_modifiers 7644 // should be 0. 7645 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7646 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7647 Opc == AMDGPU::V_MAC_F32_e64_vi || 7648 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7649 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7650 Opc == AMDGPU::V_MAC_F16_e64_vi || 7651 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7652 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7653 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7654 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7655 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7656 auto it = Inst.begin(); 7657 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7658 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7659 ++it; 7660 // Copy the operand to ensure it's not invalidated when Inst grows. 7661 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7662 } 7663 } 7664 7665 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7666 OptionalImmIndexMap OptionalIdx; 7667 cvtVOP3(Inst, Operands, OptionalIdx); 7668 } 7669 7670 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7671 OptionalImmIndexMap &OptIdx) { 7672 const int Opc = Inst.getOpcode(); 7673 const MCInstrDesc &Desc = MII.get(Opc); 7674 7675 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7676 7677 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7678 assert(!IsPacked); 7679 Inst.addOperand(Inst.getOperand(0)); 7680 } 7681 7682 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7683 // instruction, and then figure out where to actually put the modifiers 7684 7685 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7686 if (OpSelIdx != -1) { 7687 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7688 } 7689 7690 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7691 if (OpSelHiIdx != -1) { 7692 int DefaultVal = IsPacked ? -1 : 0; 7693 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7694 DefaultVal); 7695 } 7696 7697 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7698 if (NegLoIdx != -1) { 7699 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7700 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7701 } 7702 7703 const int Ops[] = { AMDGPU::OpName::src0, 7704 AMDGPU::OpName::src1, 7705 AMDGPU::OpName::src2 }; 7706 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7707 AMDGPU::OpName::src1_modifiers, 7708 AMDGPU::OpName::src2_modifiers }; 7709 7710 unsigned OpSel = 0; 7711 unsigned OpSelHi = 0; 7712 unsigned NegLo = 0; 7713 unsigned NegHi = 0; 7714 7715 if (OpSelIdx != -1) 7716 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7717 7718 if (OpSelHiIdx != -1) 7719 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7720 7721 if (NegLoIdx != -1) { 7722 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7723 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7724 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7725 } 7726 7727 for (int J = 0; J < 3; ++J) { 7728 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7729 if (OpIdx == -1) 7730 break; 7731 7732 uint32_t ModVal = 0; 7733 7734 if ((OpSel & (1 << J)) != 0) 7735 ModVal |= SISrcMods::OP_SEL_0; 7736 7737 if ((OpSelHi & (1 << J)) != 0) 7738 ModVal |= SISrcMods::OP_SEL_1; 7739 7740 if ((NegLo & (1 << J)) != 0) 7741 ModVal |= SISrcMods::NEG; 7742 7743 if ((NegHi & (1 << J)) != 0) 7744 ModVal |= SISrcMods::NEG_HI; 7745 7746 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7747 7748 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7749 } 7750 } 7751 7752 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7753 OptionalImmIndexMap OptIdx; 7754 cvtVOP3(Inst, Operands, OptIdx); 7755 cvtVOP3P(Inst, Operands, OptIdx); 7756 } 7757 7758 //===----------------------------------------------------------------------===// 7759 // dpp 7760 //===----------------------------------------------------------------------===// 7761 7762 bool AMDGPUOperand::isDPP8() const { 7763 return isImmTy(ImmTyDPP8); 7764 } 7765 7766 bool AMDGPUOperand::isDPPCtrl() const { 7767 using namespace AMDGPU::DPP; 7768 7769 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7770 if (result) { 7771 int64_t Imm = getImm(); 7772 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7773 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7774 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7775 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7776 (Imm == DppCtrl::WAVE_SHL1) || 7777 (Imm == DppCtrl::WAVE_ROL1) || 7778 (Imm == DppCtrl::WAVE_SHR1) || 7779 (Imm == DppCtrl::WAVE_ROR1) || 7780 (Imm == DppCtrl::ROW_MIRROR) || 7781 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7782 (Imm == DppCtrl::BCAST15) || 7783 (Imm == DppCtrl::BCAST31) || 7784 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7785 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7786 } 7787 return false; 7788 } 7789 7790 //===----------------------------------------------------------------------===// 7791 // mAI 7792 //===----------------------------------------------------------------------===// 7793 7794 bool AMDGPUOperand::isBLGP() const { 7795 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7796 } 7797 7798 bool AMDGPUOperand::isCBSZ() const { 7799 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7800 } 7801 7802 bool AMDGPUOperand::isABID() const { 7803 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7804 } 7805 7806 bool AMDGPUOperand::isS16Imm() const { 7807 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7808 } 7809 7810 bool AMDGPUOperand::isU16Imm() const { 7811 return isImm() && isUInt<16>(getImm()); 7812 } 7813 7814 //===----------------------------------------------------------------------===// 7815 // dim 7816 //===----------------------------------------------------------------------===// 7817 7818 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7819 // We want to allow "dim:1D" etc., 7820 // but the initial 1 is tokenized as an integer. 7821 std::string Token; 7822 if (isToken(AsmToken::Integer)) { 7823 SMLoc Loc = getToken().getEndLoc(); 7824 Token = std::string(getTokenStr()); 7825 lex(); 7826 if (getLoc() != Loc) 7827 return false; 7828 } 7829 7830 StringRef Suffix; 7831 if (!parseId(Suffix)) 7832 return false; 7833 Token += Suffix; 7834 7835 StringRef DimId = Token; 7836 if (DimId.startswith("SQ_RSRC_IMG_")) 7837 DimId = DimId.drop_front(12); 7838 7839 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7840 if (!DimInfo) 7841 return false; 7842 7843 Encoding = DimInfo->Encoding; 7844 return true; 7845 } 7846 7847 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7848 if (!isGFX10Plus()) 7849 return MatchOperand_NoMatch; 7850 7851 SMLoc S = getLoc(); 7852 7853 if (!trySkipId("dim", AsmToken::Colon)) 7854 return MatchOperand_NoMatch; 7855 7856 unsigned Encoding; 7857 SMLoc Loc = getLoc(); 7858 if (!parseDimId(Encoding)) { 7859 Error(Loc, "invalid dim value"); 7860 return MatchOperand_ParseFail; 7861 } 7862 7863 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7864 AMDGPUOperand::ImmTyDim)); 7865 return MatchOperand_Success; 7866 } 7867 7868 //===----------------------------------------------------------------------===// 7869 // dpp 7870 //===----------------------------------------------------------------------===// 7871 7872 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7873 SMLoc S = getLoc(); 7874 7875 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7876 return MatchOperand_NoMatch; 7877 7878 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7879 7880 int64_t Sels[8]; 7881 7882 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7883 return MatchOperand_ParseFail; 7884 7885 for (size_t i = 0; i < 8; ++i) { 7886 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7887 return MatchOperand_ParseFail; 7888 7889 SMLoc Loc = getLoc(); 7890 if (getParser().parseAbsoluteExpression(Sels[i])) 7891 return MatchOperand_ParseFail; 7892 if (0 > Sels[i] || 7 < Sels[i]) { 7893 Error(Loc, "expected a 3-bit value"); 7894 return MatchOperand_ParseFail; 7895 } 7896 } 7897 7898 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7899 return MatchOperand_ParseFail; 7900 7901 unsigned DPP8 = 0; 7902 for (size_t i = 0; i < 8; ++i) 7903 DPP8 |= (Sels[i] << (i * 3)); 7904 7905 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7906 return MatchOperand_Success; 7907 } 7908 7909 bool 7910 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7911 const OperandVector &Operands) { 7912 if (Ctrl == "row_newbcast") 7913 return isGFX90A(); 7914 7915 if (Ctrl == "row_share" || 7916 Ctrl == "row_xmask") 7917 return isGFX10Plus(); 7918 7919 if (Ctrl == "wave_shl" || 7920 Ctrl == "wave_shr" || 7921 Ctrl == "wave_rol" || 7922 Ctrl == "wave_ror" || 7923 Ctrl == "row_bcast") 7924 return isVI() || isGFX9(); 7925 7926 return Ctrl == "row_mirror" || 7927 Ctrl == "row_half_mirror" || 7928 Ctrl == "quad_perm" || 7929 Ctrl == "row_shl" || 7930 Ctrl == "row_shr" || 7931 Ctrl == "row_ror"; 7932 } 7933 7934 int64_t 7935 AMDGPUAsmParser::parseDPPCtrlPerm() { 7936 // quad_perm:[%d,%d,%d,%d] 7937 7938 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7939 return -1; 7940 7941 int64_t Val = 0; 7942 for (int i = 0; i < 4; ++i) { 7943 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7944 return -1; 7945 7946 int64_t Temp; 7947 SMLoc Loc = getLoc(); 7948 if (getParser().parseAbsoluteExpression(Temp)) 7949 return -1; 7950 if (Temp < 0 || Temp > 3) { 7951 Error(Loc, "expected a 2-bit value"); 7952 return -1; 7953 } 7954 7955 Val += (Temp << i * 2); 7956 } 7957 7958 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7959 return -1; 7960 7961 return Val; 7962 } 7963 7964 int64_t 7965 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7966 using namespace AMDGPU::DPP; 7967 7968 // sel:%d 7969 7970 int64_t Val; 7971 SMLoc Loc = getLoc(); 7972 7973 if (getParser().parseAbsoluteExpression(Val)) 7974 return -1; 7975 7976 struct DppCtrlCheck { 7977 int64_t Ctrl; 7978 int Lo; 7979 int Hi; 7980 }; 7981 7982 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7983 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7984 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7985 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7986 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7987 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7988 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7989 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7990 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7991 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7992 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7993 .Default({-1, 0, 0}); 7994 7995 bool Valid; 7996 if (Check.Ctrl == -1) { 7997 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7998 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7999 } else { 8000 Valid = Check.Lo <= Val && Val <= Check.Hi; 8001 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8002 } 8003 8004 if (!Valid) { 8005 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8006 return -1; 8007 } 8008 8009 return Val; 8010 } 8011 8012 OperandMatchResultTy 8013 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8014 using namespace AMDGPU::DPP; 8015 8016 if (!isToken(AsmToken::Identifier) || 8017 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8018 return MatchOperand_NoMatch; 8019 8020 SMLoc S = getLoc(); 8021 int64_t Val = -1; 8022 StringRef Ctrl; 8023 8024 parseId(Ctrl); 8025 8026 if (Ctrl == "row_mirror") { 8027 Val = DppCtrl::ROW_MIRROR; 8028 } else if (Ctrl == "row_half_mirror") { 8029 Val = DppCtrl::ROW_HALF_MIRROR; 8030 } else { 8031 if (skipToken(AsmToken::Colon, "expected a colon")) { 8032 if (Ctrl == "quad_perm") { 8033 Val = parseDPPCtrlPerm(); 8034 } else { 8035 Val = parseDPPCtrlSel(Ctrl); 8036 } 8037 } 8038 } 8039 8040 if (Val == -1) 8041 return MatchOperand_ParseFail; 8042 8043 Operands.push_back( 8044 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8045 return MatchOperand_Success; 8046 } 8047 8048 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8049 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8050 } 8051 8052 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8053 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8054 } 8055 8056 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8057 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8058 } 8059 8060 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8061 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8062 } 8063 8064 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8065 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8066 } 8067 8068 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8069 OptionalImmIndexMap OptionalIdx; 8070 8071 unsigned Opc = Inst.getOpcode(); 8072 bool HasModifiers = 8073 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8074 unsigned I = 1; 8075 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8076 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8077 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8078 } 8079 8080 int Fi = 0; 8081 for (unsigned E = Operands.size(); I != E; ++I) { 8082 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8083 MCOI::TIED_TO); 8084 if (TiedTo != -1) { 8085 assert((unsigned)TiedTo < Inst.getNumOperands()); 8086 // handle tied old or src2 for MAC instructions 8087 Inst.addOperand(Inst.getOperand(TiedTo)); 8088 } 8089 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8090 // Add the register arguments 8091 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8092 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8093 // Skip it. 8094 continue; 8095 } 8096 8097 if (IsDPP8) { 8098 if (Op.isDPP8()) { 8099 Op.addImmOperands(Inst, 1); 8100 } else if (HasModifiers && 8101 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8102 Op.addRegWithFPInputModsOperands(Inst, 2); 8103 } else if (Op.isFI()) { 8104 Fi = Op.getImm(); 8105 } else if (Op.isReg()) { 8106 Op.addRegOperands(Inst, 1); 8107 } else { 8108 llvm_unreachable("Invalid operand type"); 8109 } 8110 } else { 8111 if (HasModifiers && 8112 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8113 Op.addRegWithFPInputModsOperands(Inst, 2); 8114 } else if (Op.isReg()) { 8115 Op.addRegOperands(Inst, 1); 8116 } else if (Op.isDPPCtrl()) { 8117 Op.addImmOperands(Inst, 1); 8118 } else if (Op.isImm()) { 8119 // Handle optional arguments 8120 OptionalIdx[Op.getImmTy()] = I; 8121 } else { 8122 llvm_unreachable("Invalid operand type"); 8123 } 8124 } 8125 } 8126 8127 if (IsDPP8) { 8128 using namespace llvm::AMDGPU::DPP; 8129 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8130 } else { 8131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8134 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8136 } 8137 } 8138 } 8139 8140 //===----------------------------------------------------------------------===// 8141 // sdwa 8142 //===----------------------------------------------------------------------===// 8143 8144 OperandMatchResultTy 8145 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8146 AMDGPUOperand::ImmTy Type) { 8147 using namespace llvm::AMDGPU::SDWA; 8148 8149 SMLoc S = getLoc(); 8150 StringRef Value; 8151 OperandMatchResultTy res; 8152 8153 SMLoc StringLoc; 8154 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8155 if (res != MatchOperand_Success) { 8156 return res; 8157 } 8158 8159 int64_t Int; 8160 Int = StringSwitch<int64_t>(Value) 8161 .Case("BYTE_0", SdwaSel::BYTE_0) 8162 .Case("BYTE_1", SdwaSel::BYTE_1) 8163 .Case("BYTE_2", SdwaSel::BYTE_2) 8164 .Case("BYTE_3", SdwaSel::BYTE_3) 8165 .Case("WORD_0", SdwaSel::WORD_0) 8166 .Case("WORD_1", SdwaSel::WORD_1) 8167 .Case("DWORD", SdwaSel::DWORD) 8168 .Default(0xffffffff); 8169 8170 if (Int == 0xffffffff) { 8171 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8172 return MatchOperand_ParseFail; 8173 } 8174 8175 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8176 return MatchOperand_Success; 8177 } 8178 8179 OperandMatchResultTy 8180 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8181 using namespace llvm::AMDGPU::SDWA; 8182 8183 SMLoc S = getLoc(); 8184 StringRef Value; 8185 OperandMatchResultTy res; 8186 8187 SMLoc StringLoc; 8188 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8189 if (res != MatchOperand_Success) { 8190 return res; 8191 } 8192 8193 int64_t Int; 8194 Int = StringSwitch<int64_t>(Value) 8195 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8196 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8197 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8198 .Default(0xffffffff); 8199 8200 if (Int == 0xffffffff) { 8201 Error(StringLoc, "invalid dst_unused value"); 8202 return MatchOperand_ParseFail; 8203 } 8204 8205 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8206 return MatchOperand_Success; 8207 } 8208 8209 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8210 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8211 } 8212 8213 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8214 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8215 } 8216 8217 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8218 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8219 } 8220 8221 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8222 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8223 } 8224 8225 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8226 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8227 } 8228 8229 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8230 uint64_t BasicInstType, 8231 bool SkipDstVcc, 8232 bool SkipSrcVcc) { 8233 using namespace llvm::AMDGPU::SDWA; 8234 8235 OptionalImmIndexMap OptionalIdx; 8236 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8237 bool SkippedVcc = false; 8238 8239 unsigned I = 1; 8240 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8241 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8242 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8243 } 8244 8245 for (unsigned E = Operands.size(); I != E; ++I) { 8246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8247 if (SkipVcc && !SkippedVcc && Op.isReg() && 8248 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8249 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8250 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8251 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8252 // Skip VCC only if we didn't skip it on previous iteration. 8253 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8254 if (BasicInstType == SIInstrFlags::VOP2 && 8255 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8256 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8257 SkippedVcc = true; 8258 continue; 8259 } else if (BasicInstType == SIInstrFlags::VOPC && 8260 Inst.getNumOperands() == 0) { 8261 SkippedVcc = true; 8262 continue; 8263 } 8264 } 8265 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8266 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8267 } else if (Op.isImm()) { 8268 // Handle optional arguments 8269 OptionalIdx[Op.getImmTy()] = I; 8270 } else { 8271 llvm_unreachable("Invalid operand type"); 8272 } 8273 SkippedVcc = false; 8274 } 8275 8276 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8277 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8278 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8279 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8280 switch (BasicInstType) { 8281 case SIInstrFlags::VOP1: 8282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8283 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8285 } 8286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8289 break; 8290 8291 case SIInstrFlags::VOP2: 8292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8293 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8294 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8295 } 8296 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8297 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8298 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8299 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8300 break; 8301 8302 case SIInstrFlags::VOPC: 8303 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8304 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8307 break; 8308 8309 default: 8310 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8311 } 8312 } 8313 8314 // special case v_mac_{f16, f32}: 8315 // it has src2 register operand that is tied to dst operand 8316 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8317 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8318 auto it = Inst.begin(); 8319 std::advance( 8320 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8321 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8322 } 8323 } 8324 8325 //===----------------------------------------------------------------------===// 8326 // mAI 8327 //===----------------------------------------------------------------------===// 8328 8329 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8330 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8331 } 8332 8333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8334 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8335 } 8336 8337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8338 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8339 } 8340 8341 /// Force static initialization. 8342 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8343 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8344 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8345 } 8346 8347 #define GET_REGISTER_MATCHER 8348 #define GET_MATCHER_IMPLEMENTATION 8349 #define GET_MNEMONIC_SPELL_CHECKER 8350 #define GET_MNEMONIC_CHECKER 8351 #include "AMDGPUGenAsmMatcher.inc" 8352 8353 // This fuction should be defined after auto-generated include so that we have 8354 // MatchClassKind enum defined 8355 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8356 unsigned Kind) { 8357 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8358 // But MatchInstructionImpl() expects to meet token and fails to validate 8359 // operand. This method checks if we are given immediate operand but expect to 8360 // get corresponding token. 8361 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8362 switch (Kind) { 8363 case MCK_addr64: 8364 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8365 case MCK_gds: 8366 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8367 case MCK_lds: 8368 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8369 case MCK_idxen: 8370 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8371 case MCK_offen: 8372 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8373 case MCK_SSrcB32: 8374 // When operands have expression values, they will return true for isToken, 8375 // because it is not possible to distinguish between a token and an 8376 // expression at parse time. MatchInstructionImpl() will always try to 8377 // match an operand as a token, when isToken returns true, and when the 8378 // name of the expression is not a valid token, the match will fail, 8379 // so we need to handle it here. 8380 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8381 case MCK_SSrcF32: 8382 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8383 case MCK_SoppBrTarget: 8384 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8385 case MCK_VReg32OrOff: 8386 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8387 case MCK_InterpSlot: 8388 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8389 case MCK_Attr: 8390 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8391 case MCK_AttrChan: 8392 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8393 case MCK_ImmSMEMOffset: 8394 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8395 case MCK_SReg_64: 8396 case MCK_SReg_64_XEXEC: 8397 // Null is defined as a 32-bit register but 8398 // it should also be enabled with 64-bit operands. 8399 // The following code enables it for SReg_64 operands 8400 // used as source and destination. Remaining source 8401 // operands are handled in isInlinableImm. 8402 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8403 default: 8404 return Match_InvalidOperand; 8405 } 8406 } 8407 8408 //===----------------------------------------------------------------------===// 8409 // endpgm 8410 //===----------------------------------------------------------------------===// 8411 8412 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8413 SMLoc S = getLoc(); 8414 int64_t Imm = 0; 8415 8416 if (!parseExpr(Imm)) { 8417 // The operand is optional, if not present default to 0 8418 Imm = 0; 8419 } 8420 8421 if (!isUInt<16>(Imm)) { 8422 Error(S, "expected a 16-bit value"); 8423 return MatchOperand_ParseFail; 8424 } 8425 8426 Operands.push_back( 8427 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8428 return MatchOperand_Success; 8429 } 8430 8431 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8432