1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmParser.h" 29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCSymbol.h" 32 #include "llvm/MC/TargetRegistry.h" 33 #include "llvm/Support/AMDGPUMetadata.h" 34 #include "llvm/Support/AMDHSAKernelDescriptor.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/MachineValueType.h" 37 #include "llvm/Support/TargetParser.h" 38 39 using namespace llvm; 40 using namespace llvm::AMDGPU; 41 using namespace llvm::amdhsa; 42 43 namespace { 44 45 class AMDGPUAsmParser; 46 47 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 48 49 //===----------------------------------------------------------------------===// 50 // Operand 51 //===----------------------------------------------------------------------===// 52 53 class AMDGPUOperand : public MCParsedAsmOperand { 54 enum KindTy { 55 Token, 56 Immediate, 57 Register, 58 Expression 59 } Kind; 60 61 SMLoc StartLoc, EndLoc; 62 const AMDGPUAsmParser *AsmParser; 63 64 public: 65 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 66 : Kind(Kind_), AsmParser(AsmParser_) {} 67 68 using Ptr = std::unique_ptr<AMDGPUOperand>; 69 70 struct Modifiers { 71 bool Abs = false; 72 bool Neg = false; 73 bool Sext = false; 74 75 bool hasFPModifiers() const { return Abs || Neg; } 76 bool hasIntModifiers() const { return Sext; } 77 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 78 79 int64_t getFPModifiersOperand() const { 80 int64_t Operand = 0; 81 Operand |= Abs ? SISrcMods::ABS : 0u; 82 Operand |= Neg ? SISrcMods::NEG : 0u; 83 return Operand; 84 } 85 86 int64_t getIntModifiersOperand() const { 87 int64_t Operand = 0; 88 Operand |= Sext ? SISrcMods::SEXT : 0u; 89 return Operand; 90 } 91 92 int64_t getModifiersOperand() const { 93 assert(!(hasFPModifiers() && hasIntModifiers()) 94 && "fp and int modifiers should not be used simultaneously"); 95 if (hasFPModifiers()) { 96 return getFPModifiersOperand(); 97 } else if (hasIntModifiers()) { 98 return getIntModifiersOperand(); 99 } else { 100 return 0; 101 } 102 } 103 104 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 105 }; 106 107 enum ImmTy { 108 ImmTyNone, 109 ImmTyGDS, 110 ImmTyLDS, 111 ImmTyOffen, 112 ImmTyIdxen, 113 ImmTyAddr64, 114 ImmTyOffset, 115 ImmTyInstOffset, 116 ImmTyOffset0, 117 ImmTyOffset1, 118 ImmTyCPol, 119 ImmTySWZ, 120 ImmTyTFE, 121 ImmTyD16, 122 ImmTyClampSI, 123 ImmTyOModSI, 124 ImmTyDPP8, 125 ImmTyDppCtrl, 126 ImmTyDppRowMask, 127 ImmTyDppBankMask, 128 ImmTyDppBoundCtrl, 129 ImmTyDppFi, 130 ImmTySdwaDstSel, 131 ImmTySdwaSrc0Sel, 132 ImmTySdwaSrc1Sel, 133 ImmTySdwaDstUnused, 134 ImmTyDMask, 135 ImmTyDim, 136 ImmTyUNorm, 137 ImmTyDA, 138 ImmTyR128A16, 139 ImmTyA16, 140 ImmTyLWE, 141 ImmTyExpTgt, 142 ImmTyExpCompr, 143 ImmTyExpVM, 144 ImmTyFORMAT, 145 ImmTyHwreg, 146 ImmTyOff, 147 ImmTySendMsg, 148 ImmTyInterpSlot, 149 ImmTyInterpAttr, 150 ImmTyAttrChan, 151 ImmTyOpSel, 152 ImmTyOpSelHi, 153 ImmTyNegLo, 154 ImmTyNegHi, 155 ImmTySwizzle, 156 ImmTyGprIdxMode, 157 ImmTyHigh, 158 ImmTyBLGP, 159 ImmTyCBSZ, 160 ImmTyABID, 161 ImmTyEndpgm, 162 }; 163 164 enum ImmKindTy { 165 ImmKindTyNone, 166 ImmKindTyLiteral, 167 ImmKindTyConst, 168 }; 169 170 private: 171 struct TokOp { 172 const char *Data; 173 unsigned Length; 174 }; 175 176 struct ImmOp { 177 int64_t Val; 178 ImmTy Type; 179 bool IsFPImm; 180 mutable ImmKindTy Kind; 181 Modifiers Mods; 182 }; 183 184 struct RegOp { 185 unsigned RegNo; 186 Modifiers Mods; 187 }; 188 189 union { 190 TokOp Tok; 191 ImmOp Imm; 192 RegOp Reg; 193 const MCExpr *Expr; 194 }; 195 196 public: 197 bool isToken() const override { 198 if (Kind == Token) 199 return true; 200 201 // When parsing operands, we can't always tell if something was meant to be 202 // a token, like 'gds', or an expression that references a global variable. 203 // In this case, we assume the string is an expression, and if we need to 204 // interpret is a token, then we treat the symbol name as the token. 205 return isSymbolRefExpr(); 206 } 207 208 bool isSymbolRefExpr() const { 209 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 210 } 211 212 bool isImm() const override { 213 return Kind == Immediate; 214 } 215 216 void setImmKindNone() const { 217 assert(isImm()); 218 Imm.Kind = ImmKindTyNone; 219 } 220 221 void setImmKindLiteral() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyLiteral; 224 } 225 226 void setImmKindConst() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyConst; 229 } 230 231 bool IsImmKindLiteral() const { 232 return isImm() && Imm.Kind == ImmKindTyLiteral; 233 } 234 235 bool isImmKindConst() const { 236 return isImm() && Imm.Kind == ImmKindTyConst; 237 } 238 239 bool isInlinableImm(MVT type) const; 240 bool isLiteralImm(MVT type) const; 241 242 bool isRegKind() const { 243 return Kind == Register; 244 } 245 246 bool isReg() const override { 247 return isRegKind() && !hasModifiers(); 248 } 249 250 bool isRegOrInline(unsigned RCID, MVT type) const { 251 return isRegClass(RCID) || isInlinableImm(type); 252 } 253 254 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 255 return isRegOrInline(RCID, type) || isLiteralImm(type); 256 } 257 258 bool isRegOrImmWithInt16InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 260 } 261 262 bool isRegOrImmWithInt32InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 264 } 265 266 bool isRegOrImmWithInt64InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 268 } 269 270 bool isRegOrImmWithFP16InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 272 } 273 274 bool isRegOrImmWithFP32InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 276 } 277 278 bool isRegOrImmWithFP64InputMods() const { 279 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 280 } 281 282 bool isVReg() const { 283 return isRegClass(AMDGPU::VGPR_32RegClassID) || 284 isRegClass(AMDGPU::VReg_64RegClassID) || 285 isRegClass(AMDGPU::VReg_96RegClassID) || 286 isRegClass(AMDGPU::VReg_128RegClassID) || 287 isRegClass(AMDGPU::VReg_160RegClassID) || 288 isRegClass(AMDGPU::VReg_192RegClassID) || 289 isRegClass(AMDGPU::VReg_256RegClassID) || 290 isRegClass(AMDGPU::VReg_512RegClassID) || 291 isRegClass(AMDGPU::VReg_1024RegClassID); 292 } 293 294 bool isVReg32() const { 295 return isRegClass(AMDGPU::VGPR_32RegClassID); 296 } 297 298 bool isVReg32OrOff() const { 299 return isOff() || isVReg32(); 300 } 301 302 bool isNull() const { 303 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 304 } 305 306 bool isVRegWithInputMods() const; 307 308 bool isSDWAOperand(MVT type) const; 309 bool isSDWAFP16Operand() const; 310 bool isSDWAFP32Operand() const; 311 bool isSDWAInt16Operand() const; 312 bool isSDWAInt32Operand() const; 313 314 bool isImmTy(ImmTy ImmT) const { 315 return isImm() && Imm.Type == ImmT; 316 } 317 318 bool isImmModifier() const { 319 return isImm() && Imm.Type != ImmTyNone; 320 } 321 322 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 323 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 324 bool isDMask() const { return isImmTy(ImmTyDMask); } 325 bool isDim() const { return isImmTy(ImmTyDim); } 326 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 327 bool isDA() const { return isImmTy(ImmTyDA); } 328 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 329 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 330 bool isLWE() const { return isImmTy(ImmTyLWE); } 331 bool isOff() const { return isImmTy(ImmTyOff); } 332 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 333 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 334 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 335 bool isOffen() const { return isImmTy(ImmTyOffen); } 336 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 337 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 338 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 339 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 340 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 341 342 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 343 bool isGDS() const { return isImmTy(ImmTyGDS); } 344 bool isLDS() const { return isImmTy(ImmTyLDS); } 345 bool isCPol() const { return isImmTy(ImmTyCPol); } 346 bool isSWZ() const { return isImmTy(ImmTySWZ); } 347 bool isTFE() const { return isImmTy(ImmTyTFE); } 348 bool isD16() const { return isImmTy(ImmTyD16); } 349 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 350 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 351 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 352 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 353 bool isFI() const { return isImmTy(ImmTyDppFi); } 354 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 355 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 356 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 357 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 358 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 359 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 360 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 361 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 362 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 363 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 364 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 365 bool isHigh() const { return isImmTy(ImmTyHigh); } 366 367 bool isMod() const { 368 return isClampSI() || isOModSI(); 369 } 370 371 bool isRegOrImm() const { 372 return isReg() || isImm(); 373 } 374 375 bool isRegClass(unsigned RCID) const; 376 377 bool isInlineValue() const; 378 379 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 380 return isRegOrInline(RCID, type) && !hasModifiers(); 381 } 382 383 bool isSCSrcB16() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 385 } 386 387 bool isSCSrcV2B16() const { 388 return isSCSrcB16(); 389 } 390 391 bool isSCSrcB32() const { 392 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 393 } 394 395 bool isSCSrcB64() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 397 } 398 399 bool isBoolReg() const; 400 401 bool isSCSrcF16() const { 402 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 403 } 404 405 bool isSCSrcV2F16() const { 406 return isSCSrcF16(); 407 } 408 409 bool isSCSrcF32() const { 410 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 411 } 412 413 bool isSCSrcF64() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 415 } 416 417 bool isSSrcB32() const { 418 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 419 } 420 421 bool isSSrcB16() const { 422 return isSCSrcB16() || isLiteralImm(MVT::i16); 423 } 424 425 bool isSSrcV2B16() const { 426 llvm_unreachable("cannot happen"); 427 return isSSrcB16(); 428 } 429 430 bool isSSrcB64() const { 431 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 432 // See isVSrc64(). 433 return isSCSrcB64() || isLiteralImm(MVT::i64); 434 } 435 436 bool isSSrcF32() const { 437 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 438 } 439 440 bool isSSrcF64() const { 441 return isSCSrcB64() || isLiteralImm(MVT::f64); 442 } 443 444 bool isSSrcF16() const { 445 return isSCSrcB16() || isLiteralImm(MVT::f16); 446 } 447 448 bool isSSrcV2F16() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF16(); 451 } 452 453 bool isSSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSSrcF32(); 456 } 457 458 bool isSCSrcV2FP32() const { 459 llvm_unreachable("cannot happen"); 460 return isSCSrcF32(); 461 } 462 463 bool isSSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSSrcB32(); 466 } 467 468 bool isSCSrcV2INT32() const { 469 llvm_unreachable("cannot happen"); 470 return isSCSrcB32(); 471 } 472 473 bool isSSrcOrLdsB32() const { 474 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 475 isLiteralImm(MVT::i32) || isExpr(); 476 } 477 478 bool isVCSrcB32() const { 479 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 480 } 481 482 bool isVCSrcB64() const { 483 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 484 } 485 486 bool isVCSrcB16() const { 487 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 488 } 489 490 bool isVCSrcV2B16() const { 491 return isVCSrcB16(); 492 } 493 494 bool isVCSrcF32() const { 495 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 496 } 497 498 bool isVCSrcF64() const { 499 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 500 } 501 502 bool isVCSrcF16() const { 503 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 504 } 505 506 bool isVCSrcV2F16() const { 507 return isVCSrcF16(); 508 } 509 510 bool isVSrcB32() const { 511 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 512 } 513 514 bool isVSrcB64() const { 515 return isVCSrcF64() || isLiteralImm(MVT::i64); 516 } 517 518 bool isVSrcB16() const { 519 return isVCSrcB16() || isLiteralImm(MVT::i16); 520 } 521 522 bool isVSrcV2B16() const { 523 return isVSrcB16() || isLiteralImm(MVT::v2i16); 524 } 525 526 bool isVCSrcV2FP32() const { 527 return isVCSrcF64(); 528 } 529 530 bool isVSrcV2FP32() const { 531 return isVSrcF64() || isLiteralImm(MVT::v2f32); 532 } 533 534 bool isVCSrcV2INT32() const { 535 return isVCSrcB64(); 536 } 537 538 bool isVSrcV2INT32() const { 539 return isVSrcB64() || isLiteralImm(MVT::v2i32); 540 } 541 542 bool isVSrcF32() const { 543 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 544 } 545 546 bool isVSrcF64() const { 547 return isVCSrcF64() || isLiteralImm(MVT::f64); 548 } 549 550 bool isVSrcF16() const { 551 return isVCSrcF16() || isLiteralImm(MVT::f16); 552 } 553 554 bool isVSrcV2F16() const { 555 return isVSrcF16() || isLiteralImm(MVT::v2f16); 556 } 557 558 bool isVISrcB32() const { 559 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 560 } 561 562 bool isVISrcB16() const { 563 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 564 } 565 566 bool isVISrcV2B16() const { 567 return isVISrcB16(); 568 } 569 570 bool isVISrcF32() const { 571 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 572 } 573 574 bool isVISrcF16() const { 575 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 576 } 577 578 bool isVISrcV2F16() const { 579 return isVISrcF16() || isVISrcB32(); 580 } 581 582 bool isVISrc_64B64() const { 583 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 584 } 585 586 bool isVISrc_64F64() const { 587 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 588 } 589 590 bool isVISrc_64V2FP32() const { 591 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 592 } 593 594 bool isVISrc_64V2INT32() const { 595 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 596 } 597 598 bool isVISrc_256B64() const { 599 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 600 } 601 602 bool isVISrc_256F64() const { 603 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 604 } 605 606 bool isVISrc_128B16() const { 607 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 608 } 609 610 bool isVISrc_128V2B16() const { 611 return isVISrc_128B16(); 612 } 613 614 bool isVISrc_128B32() const { 615 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 616 } 617 618 bool isVISrc_128F32() const { 619 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 620 } 621 622 bool isVISrc_256V2FP32() const { 623 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 624 } 625 626 bool isVISrc_256V2INT32() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 628 } 629 630 bool isVISrc_512B32() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 632 } 633 634 bool isVISrc_512B16() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 636 } 637 638 bool isVISrc_512V2B16() const { 639 return isVISrc_512B16(); 640 } 641 642 bool isVISrc_512F32() const { 643 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 644 } 645 646 bool isVISrc_512F16() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 648 } 649 650 bool isVISrc_512V2F16() const { 651 return isVISrc_512F16() || isVISrc_512B32(); 652 } 653 654 bool isVISrc_1024B32() const { 655 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 656 } 657 658 bool isVISrc_1024B16() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 660 } 661 662 bool isVISrc_1024V2B16() const { 663 return isVISrc_1024B16(); 664 } 665 666 bool isVISrc_1024F32() const { 667 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 668 } 669 670 bool isVISrc_1024F16() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 672 } 673 674 bool isVISrc_1024V2F16() const { 675 return isVISrc_1024F16() || isVISrc_1024B32(); 676 } 677 678 bool isAISrcB32() const { 679 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 680 } 681 682 bool isAISrcB16() const { 683 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 684 } 685 686 bool isAISrcV2B16() const { 687 return isAISrcB16(); 688 } 689 690 bool isAISrcF32() const { 691 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 692 } 693 694 bool isAISrcF16() const { 695 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 696 } 697 698 bool isAISrcV2F16() const { 699 return isAISrcF16() || isAISrcB32(); 700 } 701 702 bool isAISrc_64B64() const { 703 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 704 } 705 706 bool isAISrc_64F64() const { 707 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 708 } 709 710 bool isAISrc_128B32() const { 711 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 712 } 713 714 bool isAISrc_128B16() const { 715 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 716 } 717 718 bool isAISrc_128V2B16() const { 719 return isAISrc_128B16(); 720 } 721 722 bool isAISrc_128F32() const { 723 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 724 } 725 726 bool isAISrc_128F16() const { 727 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 728 } 729 730 bool isAISrc_128V2F16() const { 731 return isAISrc_128F16() || isAISrc_128B32(); 732 } 733 734 bool isVISrc_128F16() const { 735 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 736 } 737 738 bool isVISrc_128V2F16() const { 739 return isVISrc_128F16() || isVISrc_128B32(); 740 } 741 742 bool isAISrc_256B64() const { 743 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 744 } 745 746 bool isAISrc_256F64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 748 } 749 750 bool isAISrc_512B32() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 752 } 753 754 bool isAISrc_512B16() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 756 } 757 758 bool isAISrc_512V2B16() const { 759 return isAISrc_512B16(); 760 } 761 762 bool isAISrc_512F32() const { 763 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 764 } 765 766 bool isAISrc_512F16() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 768 } 769 770 bool isAISrc_512V2F16() const { 771 return isAISrc_512F16() || isAISrc_512B32(); 772 } 773 774 bool isAISrc_1024B32() const { 775 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 776 } 777 778 bool isAISrc_1024B16() const { 779 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 780 } 781 782 bool isAISrc_1024V2B16() const { 783 return isAISrc_1024B16(); 784 } 785 786 bool isAISrc_1024F32() const { 787 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 788 } 789 790 bool isAISrc_1024F16() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 792 } 793 794 bool isAISrc_1024V2F16() const { 795 return isAISrc_1024F16() || isAISrc_1024B32(); 796 } 797 798 bool isKImmFP32() const { 799 return isLiteralImm(MVT::f32); 800 } 801 802 bool isKImmFP16() const { 803 return isLiteralImm(MVT::f16); 804 } 805 806 bool isMem() const override { 807 return false; 808 } 809 810 bool isExpr() const { 811 return Kind == Expression; 812 } 813 814 bool isSoppBrTarget() const { 815 return isExpr() || isImm(); 816 } 817 818 bool isSWaitCnt() const; 819 bool isHwreg() const; 820 bool isSendMsg() const; 821 bool isSwizzle() const; 822 bool isSMRDOffset8() const; 823 bool isSMEMOffset() const; 824 bool isSMRDLiteralOffset() const; 825 bool isDPP8() const; 826 bool isDPPCtrl() const; 827 bool isBLGP() const; 828 bool isCBSZ() const; 829 bool isABID() const; 830 bool isGPRIdxMode() const; 831 bool isS16Imm() const; 832 bool isU16Imm() const; 833 bool isEndpgm() const; 834 835 StringRef getExpressionAsToken() const { 836 assert(isExpr()); 837 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 838 return S->getSymbol().getName(); 839 } 840 841 StringRef getToken() const { 842 assert(isToken()); 843 844 if (Kind == Expression) 845 return getExpressionAsToken(); 846 847 return StringRef(Tok.Data, Tok.Length); 848 } 849 850 int64_t getImm() const { 851 assert(isImm()); 852 return Imm.Val; 853 } 854 855 void setImm(int64_t Val) { 856 assert(isImm()); 857 Imm.Val = Val; 858 } 859 860 ImmTy getImmTy() const { 861 assert(isImm()); 862 return Imm.Type; 863 } 864 865 unsigned getReg() const override { 866 assert(isRegKind()); 867 return Reg.RegNo; 868 } 869 870 SMLoc getStartLoc() const override { 871 return StartLoc; 872 } 873 874 SMLoc getEndLoc() const override { 875 return EndLoc; 876 } 877 878 SMRange getLocRange() const { 879 return SMRange(StartLoc, EndLoc); 880 } 881 882 Modifiers getModifiers() const { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 return isRegKind() ? Reg.Mods : Imm.Mods; 885 } 886 887 void setModifiers(Modifiers Mods) { 888 assert(isRegKind() || isImmTy(ImmTyNone)); 889 if (isRegKind()) 890 Reg.Mods = Mods; 891 else 892 Imm.Mods = Mods; 893 } 894 895 bool hasModifiers() const { 896 return getModifiers().hasModifiers(); 897 } 898 899 bool hasFPModifiers() const { 900 return getModifiers().hasFPModifiers(); 901 } 902 903 bool hasIntModifiers() const { 904 return getModifiers().hasIntModifiers(); 905 } 906 907 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 908 909 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 910 911 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 912 913 template <unsigned Bitwidth> 914 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 915 916 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 917 addKImmFPOperands<16>(Inst, N); 918 } 919 920 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 921 addKImmFPOperands<32>(Inst, N); 922 } 923 924 void addRegOperands(MCInst &Inst, unsigned N) const; 925 926 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 927 addRegOperands(Inst, N); 928 } 929 930 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 931 if (isRegKind()) 932 addRegOperands(Inst, N); 933 else if (isExpr()) 934 Inst.addOperand(MCOperand::createExpr(Expr)); 935 else 936 addImmOperands(Inst, N); 937 } 938 939 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 940 Modifiers Mods = getModifiers(); 941 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 942 if (isRegKind()) { 943 addRegOperands(Inst, N); 944 } else { 945 addImmOperands(Inst, N, false); 946 } 947 } 948 949 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasIntModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 955 assert(!hasFPModifiers()); 956 addRegOrImmWithInputModsOperands(Inst, N); 957 } 958 959 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 960 Modifiers Mods = getModifiers(); 961 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 962 assert(isRegKind()); 963 addRegOperands(Inst, N); 964 } 965 966 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasIntModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 972 assert(!hasFPModifiers()); 973 addRegWithInputModsOperands(Inst, N); 974 } 975 976 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 977 if (isImm()) 978 addImmOperands(Inst, N); 979 else { 980 assert(isExpr()); 981 Inst.addOperand(MCOperand::createExpr(Expr)); 982 } 983 } 984 985 static void printImmTy(raw_ostream& OS, ImmTy Type) { 986 switch (Type) { 987 case ImmTyNone: OS << "None"; break; 988 case ImmTyGDS: OS << "GDS"; break; 989 case ImmTyLDS: OS << "LDS"; break; 990 case ImmTyOffen: OS << "Offen"; break; 991 case ImmTyIdxen: OS << "Idxen"; break; 992 case ImmTyAddr64: OS << "Addr64"; break; 993 case ImmTyOffset: OS << "Offset"; break; 994 case ImmTyInstOffset: OS << "InstOffset"; break; 995 case ImmTyOffset0: OS << "Offset0"; break; 996 case ImmTyOffset1: OS << "Offset1"; break; 997 case ImmTyCPol: OS << "CPol"; break; 998 case ImmTySWZ: OS << "SWZ"; break; 999 case ImmTyTFE: OS << "TFE"; break; 1000 case ImmTyD16: OS << "D16"; break; 1001 case ImmTyFORMAT: OS << "FORMAT"; break; 1002 case ImmTyClampSI: OS << "ClampSI"; break; 1003 case ImmTyOModSI: OS << "OModSI"; break; 1004 case ImmTyDPP8: OS << "DPP8"; break; 1005 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1006 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1007 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1008 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1009 case ImmTyDppFi: OS << "FI"; break; 1010 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1011 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1012 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1013 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1014 case ImmTyDMask: OS << "DMask"; break; 1015 case ImmTyDim: OS << "Dim"; break; 1016 case ImmTyUNorm: OS << "UNorm"; break; 1017 case ImmTyDA: OS << "DA"; break; 1018 case ImmTyR128A16: OS << "R128A16"; break; 1019 case ImmTyA16: OS << "A16"; break; 1020 case ImmTyLWE: OS << "LWE"; break; 1021 case ImmTyOff: OS << "Off"; break; 1022 case ImmTyExpTgt: OS << "ExpTgt"; break; 1023 case ImmTyExpCompr: OS << "ExpCompr"; break; 1024 case ImmTyExpVM: OS << "ExpVM"; break; 1025 case ImmTyHwreg: OS << "Hwreg"; break; 1026 case ImmTySendMsg: OS << "SendMsg"; break; 1027 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1028 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1029 case ImmTyAttrChan: OS << "AttrChan"; break; 1030 case ImmTyOpSel: OS << "OpSel"; break; 1031 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1032 case ImmTyNegLo: OS << "NegLo"; break; 1033 case ImmTyNegHi: OS << "NegHi"; break; 1034 case ImmTySwizzle: OS << "Swizzle"; break; 1035 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1036 case ImmTyHigh: OS << "High"; break; 1037 case ImmTyBLGP: OS << "BLGP"; break; 1038 case ImmTyCBSZ: OS << "CBSZ"; break; 1039 case ImmTyABID: OS << "ABID"; break; 1040 case ImmTyEndpgm: OS << "Endpgm"; break; 1041 } 1042 } 1043 1044 void print(raw_ostream &OS) const override { 1045 switch (Kind) { 1046 case Register: 1047 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1048 break; 1049 case Immediate: 1050 OS << '<' << getImm(); 1051 if (getImmTy() != ImmTyNone) { 1052 OS << " type: "; printImmTy(OS, getImmTy()); 1053 } 1054 OS << " mods: " << Imm.Mods << '>'; 1055 break; 1056 case Token: 1057 OS << '\'' << getToken() << '\''; 1058 break; 1059 case Expression: 1060 OS << "<expr " << *Expr << '>'; 1061 break; 1062 } 1063 } 1064 1065 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1066 int64_t Val, SMLoc Loc, 1067 ImmTy Type = ImmTyNone, 1068 bool IsFPImm = false) { 1069 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1070 Op->Imm.Val = Val; 1071 Op->Imm.IsFPImm = IsFPImm; 1072 Op->Imm.Kind = ImmKindTyNone; 1073 Op->Imm.Type = Type; 1074 Op->Imm.Mods = Modifiers(); 1075 Op->StartLoc = Loc; 1076 Op->EndLoc = Loc; 1077 return Op; 1078 } 1079 1080 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1081 StringRef Str, SMLoc Loc, 1082 bool HasExplicitEncodingSize = true) { 1083 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1084 Res->Tok.Data = Str.data(); 1085 Res->Tok.Length = Str.size(); 1086 Res->StartLoc = Loc; 1087 Res->EndLoc = Loc; 1088 return Res; 1089 } 1090 1091 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1092 unsigned RegNo, SMLoc S, 1093 SMLoc E) { 1094 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1095 Op->Reg.RegNo = RegNo; 1096 Op->Reg.Mods = Modifiers(); 1097 Op->StartLoc = S; 1098 Op->EndLoc = E; 1099 return Op; 1100 } 1101 1102 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1103 const class MCExpr *Expr, SMLoc S) { 1104 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1105 Op->Expr = Expr; 1106 Op->StartLoc = S; 1107 Op->EndLoc = S; 1108 return Op; 1109 } 1110 }; 1111 1112 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1113 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1114 return OS; 1115 } 1116 1117 //===----------------------------------------------------------------------===// 1118 // AsmParser 1119 //===----------------------------------------------------------------------===// 1120 1121 // Holds info related to the current kernel, e.g. count of SGPRs used. 1122 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1123 // .amdgpu_hsa_kernel or at EOF. 1124 class KernelScopeInfo { 1125 int SgprIndexUnusedMin = -1; 1126 int VgprIndexUnusedMin = -1; 1127 MCContext *Ctx = nullptr; 1128 1129 void usesSgprAt(int i) { 1130 if (i >= SgprIndexUnusedMin) { 1131 SgprIndexUnusedMin = ++i; 1132 if (Ctx) { 1133 MCSymbol* const Sym = 1134 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1135 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1136 } 1137 } 1138 } 1139 1140 void usesVgprAt(int i) { 1141 if (i >= VgprIndexUnusedMin) { 1142 VgprIndexUnusedMin = ++i; 1143 if (Ctx) { 1144 MCSymbol* const Sym = 1145 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1146 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1147 } 1148 } 1149 } 1150 1151 public: 1152 KernelScopeInfo() = default; 1153 1154 void initialize(MCContext &Context) { 1155 Ctx = &Context; 1156 usesSgprAt(SgprIndexUnusedMin = -1); 1157 usesVgprAt(VgprIndexUnusedMin = -1); 1158 } 1159 1160 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1161 switch (RegKind) { 1162 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1163 case IS_AGPR: // fall through 1164 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1165 default: break; 1166 } 1167 } 1168 }; 1169 1170 class AMDGPUAsmParser : public MCTargetAsmParser { 1171 MCAsmParser &Parser; 1172 1173 // Number of extra operands parsed after the first optional operand. 1174 // This may be necessary to skip hardcoded mandatory operands. 1175 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1176 1177 unsigned ForcedEncodingSize = 0; 1178 bool ForcedDPP = false; 1179 bool ForcedSDWA = false; 1180 KernelScopeInfo KernelScope; 1181 unsigned CPolSeen; 1182 1183 /// @name Auto-generated Match Functions 1184 /// { 1185 1186 #define GET_ASSEMBLER_HEADER 1187 #include "AMDGPUGenAsmMatcher.inc" 1188 1189 /// } 1190 1191 private: 1192 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1193 bool OutOfRangeError(SMRange Range); 1194 /// Calculate VGPR/SGPR blocks required for given target, reserved 1195 /// registers, and user-specified NextFreeXGPR values. 1196 /// 1197 /// \param Features [in] Target features, used for bug corrections. 1198 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1199 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1200 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1201 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1202 /// descriptor field, if valid. 1203 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1204 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1205 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1206 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1207 /// \param VGPRBlocks [out] Result VGPR block count. 1208 /// \param SGPRBlocks [out] Result SGPR block count. 1209 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1210 bool FlatScrUsed, bool XNACKUsed, 1211 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1212 SMRange VGPRRange, unsigned NextFreeSGPR, 1213 SMRange SGPRRange, unsigned &VGPRBlocks, 1214 unsigned &SGPRBlocks); 1215 bool ParseDirectiveAMDGCNTarget(); 1216 bool ParseDirectiveAMDHSAKernel(); 1217 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1218 bool ParseDirectiveHSACodeObjectVersion(); 1219 bool ParseDirectiveHSACodeObjectISA(); 1220 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1221 bool ParseDirectiveAMDKernelCodeT(); 1222 // TODO: Possibly make subtargetHasRegister const. 1223 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1224 bool ParseDirectiveAMDGPUHsaKernel(); 1225 1226 bool ParseDirectiveISAVersion(); 1227 bool ParseDirectiveHSAMetadata(); 1228 bool ParseDirectivePALMetadataBegin(); 1229 bool ParseDirectivePALMetadata(); 1230 bool ParseDirectiveAMDGPULDS(); 1231 1232 /// Common code to parse out a block of text (typically YAML) between start and 1233 /// end directives. 1234 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1235 const char *AssemblerDirectiveEnd, 1236 std::string &CollectString); 1237 1238 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1239 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1240 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1241 unsigned &RegNum, unsigned &RegWidth, 1242 bool RestoreOnFailure = false); 1243 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1244 unsigned &RegNum, unsigned &RegWidth, 1245 SmallVectorImpl<AsmToken> &Tokens); 1246 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1247 unsigned &RegWidth, 1248 SmallVectorImpl<AsmToken> &Tokens); 1249 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1250 unsigned &RegWidth, 1251 SmallVectorImpl<AsmToken> &Tokens); 1252 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1253 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1254 bool ParseRegRange(unsigned& Num, unsigned& Width); 1255 unsigned getRegularReg(RegisterKind RegKind, 1256 unsigned RegNum, 1257 unsigned RegWidth, 1258 SMLoc Loc); 1259 1260 bool isRegister(); 1261 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1262 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1263 void initializeGprCountSymbol(RegisterKind RegKind); 1264 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1265 unsigned RegWidth); 1266 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1267 bool IsAtomic, bool IsLds = false); 1268 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1269 bool IsGdsHardcoded); 1270 1271 public: 1272 enum AMDGPUMatchResultTy { 1273 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1274 }; 1275 enum OperandMode { 1276 OperandMode_Default, 1277 OperandMode_NSA, 1278 }; 1279 1280 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1281 1282 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1283 const MCInstrInfo &MII, 1284 const MCTargetOptions &Options) 1285 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1286 MCAsmParserExtension::Initialize(Parser); 1287 1288 if (getFeatureBits().none()) { 1289 // Set default features. 1290 copySTI().ToggleFeature("southern-islands"); 1291 } 1292 1293 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1294 1295 { 1296 // TODO: make those pre-defined variables read-only. 1297 // Currently there is none suitable machinery in the core llvm-mc for this. 1298 // MCSymbol::isRedefinable is intended for another purpose, and 1299 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1300 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1301 MCContext &Ctx = getContext(); 1302 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1303 MCSymbol *Sym = 1304 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1305 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1306 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1307 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1308 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1309 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1310 } else { 1311 MCSymbol *Sym = 1312 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1313 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1314 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1315 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1316 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1317 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1318 } 1319 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1320 initializeGprCountSymbol(IS_VGPR); 1321 initializeGprCountSymbol(IS_SGPR); 1322 } else 1323 KernelScope.initialize(getContext()); 1324 } 1325 } 1326 1327 bool hasMIMG_R128() const { 1328 return AMDGPU::hasMIMG_R128(getSTI()); 1329 } 1330 1331 bool hasPackedD16() const { 1332 return AMDGPU::hasPackedD16(getSTI()); 1333 } 1334 1335 bool hasGFX10A16() const { 1336 return AMDGPU::hasGFX10A16(getSTI()); 1337 } 1338 1339 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1340 1341 bool isSI() const { 1342 return AMDGPU::isSI(getSTI()); 1343 } 1344 1345 bool isCI() const { 1346 return AMDGPU::isCI(getSTI()); 1347 } 1348 1349 bool isVI() const { 1350 return AMDGPU::isVI(getSTI()); 1351 } 1352 1353 bool isGFX9() const { 1354 return AMDGPU::isGFX9(getSTI()); 1355 } 1356 1357 bool isGFX90A() const { 1358 return AMDGPU::isGFX90A(getSTI()); 1359 } 1360 1361 bool isGFX9Plus() const { 1362 return AMDGPU::isGFX9Plus(getSTI()); 1363 } 1364 1365 bool isGFX10() const { 1366 return AMDGPU::isGFX10(getSTI()); 1367 } 1368 1369 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1370 1371 bool isGFX10_BEncoding() const { 1372 return AMDGPU::isGFX10_BEncoding(getSTI()); 1373 } 1374 1375 bool hasInv2PiInlineImm() const { 1376 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1377 } 1378 1379 bool hasFlatOffsets() const { 1380 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1381 } 1382 1383 bool hasArchitectedFlatScratch() const { 1384 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1385 } 1386 1387 bool hasSGPR102_SGPR103() const { 1388 return !isVI() && !isGFX9(); 1389 } 1390 1391 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1392 1393 bool hasIntClamp() const { 1394 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1395 } 1396 1397 AMDGPUTargetStreamer &getTargetStreamer() { 1398 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1399 return static_cast<AMDGPUTargetStreamer &>(TS); 1400 } 1401 1402 const MCRegisterInfo *getMRI() const { 1403 // We need this const_cast because for some reason getContext() is not const 1404 // in MCAsmParser. 1405 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1406 } 1407 1408 const MCInstrInfo *getMII() const { 1409 return &MII; 1410 } 1411 1412 const FeatureBitset &getFeatureBits() const { 1413 return getSTI().getFeatureBits(); 1414 } 1415 1416 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1417 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1418 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1419 1420 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1421 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1422 bool isForcedDPP() const { return ForcedDPP; } 1423 bool isForcedSDWA() const { return ForcedSDWA; } 1424 ArrayRef<unsigned> getMatchedVariants() const; 1425 StringRef getMatchedVariantName() const; 1426 1427 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1428 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1429 bool RestoreOnFailure); 1430 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1431 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1432 SMLoc &EndLoc) override; 1433 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1434 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1435 unsigned Kind) override; 1436 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1437 OperandVector &Operands, MCStreamer &Out, 1438 uint64_t &ErrorInfo, 1439 bool MatchingInlineAsm) override; 1440 bool ParseDirective(AsmToken DirectiveID) override; 1441 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1442 OperandMode Mode = OperandMode_Default); 1443 StringRef parseMnemonicSuffix(StringRef Name); 1444 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1445 SMLoc NameLoc, OperandVector &Operands) override; 1446 //bool ProcessInstruction(MCInst &Inst); 1447 1448 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1449 1450 OperandMatchResultTy 1451 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1452 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1453 bool (*ConvertResult)(int64_t &) = nullptr); 1454 1455 OperandMatchResultTy 1456 parseOperandArrayWithPrefix(const char *Prefix, 1457 OperandVector &Operands, 1458 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1459 bool (*ConvertResult)(int64_t&) = nullptr); 1460 1461 OperandMatchResultTy 1462 parseNamedBit(StringRef Name, OperandVector &Operands, 1463 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1464 OperandMatchResultTy parseCPol(OperandVector &Operands); 1465 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1466 StringRef &Value, 1467 SMLoc &StringLoc); 1468 1469 bool isModifier(); 1470 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1472 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1473 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1474 bool parseSP3NegModifier(); 1475 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1476 OperandMatchResultTy parseReg(OperandVector &Operands); 1477 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1478 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1479 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1480 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1481 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1482 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1483 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1484 OperandMatchResultTy parseUfmt(int64_t &Format); 1485 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1486 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1487 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1488 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1489 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1490 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1491 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1492 1493 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1494 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1495 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1496 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1497 1498 bool parseCnt(int64_t &IntVal); 1499 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1500 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1501 1502 private: 1503 struct OperandInfoTy { 1504 SMLoc Loc; 1505 int64_t Id; 1506 bool IsSymbolic = false; 1507 bool IsDefined = false; 1508 1509 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1510 }; 1511 1512 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1513 bool validateSendMsg(const OperandInfoTy &Msg, 1514 const OperandInfoTy &Op, 1515 const OperandInfoTy &Stream); 1516 1517 bool parseHwregBody(OperandInfoTy &HwReg, 1518 OperandInfoTy &Offset, 1519 OperandInfoTy &Width); 1520 bool validateHwreg(const OperandInfoTy &HwReg, 1521 const OperandInfoTy &Offset, 1522 const OperandInfoTy &Width); 1523 1524 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1525 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1526 1527 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1528 const OperandVector &Operands) const; 1529 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1530 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1531 SMLoc getLitLoc(const OperandVector &Operands) const; 1532 SMLoc getConstLoc(const OperandVector &Operands) const; 1533 1534 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1535 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateSOPLiteral(const MCInst &Inst) const; 1538 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateIntClampSupported(const MCInst &Inst); 1541 bool validateMIMGAtomicDMask(const MCInst &Inst); 1542 bool validateMIMGGatherDMask(const MCInst &Inst); 1543 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateMIMGDataSize(const MCInst &Inst); 1545 bool validateMIMGAddrSize(const MCInst &Inst); 1546 bool validateMIMGD16(const MCInst &Inst); 1547 bool validateMIMGDim(const MCInst &Inst); 1548 bool validateMIMGMSAA(const MCInst &Inst); 1549 bool validateOpSel(const MCInst &Inst); 1550 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1551 bool validateVccOperand(unsigned Reg) const; 1552 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1553 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1555 bool validateAGPRLdSt(const MCInst &Inst) const; 1556 bool validateVGPRAlign(const MCInst &Inst) const; 1557 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1558 bool validateDivScale(const MCInst &Inst); 1559 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1560 const SMLoc &IDLoc); 1561 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1562 unsigned getConstantBusLimit(unsigned Opcode) const; 1563 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1564 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1565 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1566 1567 bool isSupportedMnemo(StringRef Mnemo, 1568 const FeatureBitset &FBS); 1569 bool isSupportedMnemo(StringRef Mnemo, 1570 const FeatureBitset &FBS, 1571 ArrayRef<unsigned> Variants); 1572 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1573 1574 bool isId(const StringRef Id) const; 1575 bool isId(const AsmToken &Token, const StringRef Id) const; 1576 bool isToken(const AsmToken::TokenKind Kind) const; 1577 bool trySkipId(const StringRef Id); 1578 bool trySkipId(const StringRef Pref, const StringRef Id); 1579 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1580 bool trySkipToken(const AsmToken::TokenKind Kind); 1581 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1582 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1583 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1584 1585 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1586 AsmToken::TokenKind getTokenKind() const; 1587 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1588 bool parseExpr(OperandVector &Operands); 1589 StringRef getTokenStr() const; 1590 AsmToken peekToken(); 1591 AsmToken getToken() const; 1592 SMLoc getLoc() const; 1593 void lex(); 1594 1595 public: 1596 void onBeginOfFile() override; 1597 1598 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1599 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1600 1601 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1602 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1603 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1604 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1605 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1606 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1607 1608 bool parseSwizzleOperand(int64_t &Op, 1609 const unsigned MinVal, 1610 const unsigned MaxVal, 1611 const StringRef ErrMsg, 1612 SMLoc &Loc); 1613 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1614 const unsigned MinVal, 1615 const unsigned MaxVal, 1616 const StringRef ErrMsg); 1617 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1618 bool parseSwizzleOffset(int64_t &Imm); 1619 bool parseSwizzleMacro(int64_t &Imm); 1620 bool parseSwizzleQuadPerm(int64_t &Imm); 1621 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1622 bool parseSwizzleBroadcast(int64_t &Imm); 1623 bool parseSwizzleSwap(int64_t &Imm); 1624 bool parseSwizzleReverse(int64_t &Imm); 1625 1626 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1627 int64_t parseGPRIdxMacro(); 1628 1629 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1630 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1631 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1632 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1633 1634 AMDGPUOperand::Ptr defaultCPol() const; 1635 1636 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1637 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1638 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1639 AMDGPUOperand::Ptr defaultFlatOffset() const; 1640 1641 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1642 1643 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1644 OptionalImmIndexMap &OptionalIdx); 1645 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1646 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1647 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1648 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1649 OptionalImmIndexMap &OptionalIdx); 1650 1651 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1652 1653 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1654 bool IsAtomic = false); 1655 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1656 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1657 1658 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1659 1660 bool parseDimId(unsigned &Encoding); 1661 OperandMatchResultTy parseDim(OperandVector &Operands); 1662 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1663 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1664 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1665 int64_t parseDPPCtrlSel(StringRef Ctrl); 1666 int64_t parseDPPCtrlPerm(); 1667 AMDGPUOperand::Ptr defaultRowMask() const; 1668 AMDGPUOperand::Ptr defaultBankMask() const; 1669 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1670 AMDGPUOperand::Ptr defaultFI() const; 1671 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1672 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1673 1674 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1675 AMDGPUOperand::ImmTy Type); 1676 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1677 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1678 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1679 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1680 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1681 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1682 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1683 uint64_t BasicInstType, 1684 bool SkipDstVcc = false, 1685 bool SkipSrcVcc = false); 1686 1687 AMDGPUOperand::Ptr defaultBLGP() const; 1688 AMDGPUOperand::Ptr defaultCBSZ() const; 1689 AMDGPUOperand::Ptr defaultABID() const; 1690 1691 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1692 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1693 }; 1694 1695 struct OptionalOperand { 1696 const char *Name; 1697 AMDGPUOperand::ImmTy Type; 1698 bool IsBit; 1699 bool (*ConvertResult)(int64_t&); 1700 }; 1701 1702 } // end anonymous namespace 1703 1704 // May be called with integer type with equivalent bitwidth. 1705 static const fltSemantics *getFltSemantics(unsigned Size) { 1706 switch (Size) { 1707 case 4: 1708 return &APFloat::IEEEsingle(); 1709 case 8: 1710 return &APFloat::IEEEdouble(); 1711 case 2: 1712 return &APFloat::IEEEhalf(); 1713 default: 1714 llvm_unreachable("unsupported fp type"); 1715 } 1716 } 1717 1718 static const fltSemantics *getFltSemantics(MVT VT) { 1719 return getFltSemantics(VT.getSizeInBits() / 8); 1720 } 1721 1722 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1723 switch (OperandType) { 1724 case AMDGPU::OPERAND_REG_IMM_INT32: 1725 case AMDGPU::OPERAND_REG_IMM_FP32: 1726 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1727 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1728 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1729 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1731 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1732 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1733 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1734 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1735 case AMDGPU::OPERAND_KIMM32: 1736 return &APFloat::IEEEsingle(); 1737 case AMDGPU::OPERAND_REG_IMM_INT64: 1738 case AMDGPU::OPERAND_REG_IMM_FP64: 1739 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1740 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1741 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1742 return &APFloat::IEEEdouble(); 1743 case AMDGPU::OPERAND_REG_IMM_INT16: 1744 case AMDGPU::OPERAND_REG_IMM_FP16: 1745 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1746 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1747 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1748 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1749 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1750 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1751 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1752 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1753 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1754 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1755 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1756 case AMDGPU::OPERAND_KIMM16: 1757 return &APFloat::IEEEhalf(); 1758 default: 1759 llvm_unreachable("unsupported fp type"); 1760 } 1761 } 1762 1763 //===----------------------------------------------------------------------===// 1764 // Operand 1765 //===----------------------------------------------------------------------===// 1766 1767 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1768 bool Lost; 1769 1770 // Convert literal to single precision 1771 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1772 APFloat::rmNearestTiesToEven, 1773 &Lost); 1774 // We allow precision lost but not overflow or underflow 1775 if (Status != APFloat::opOK && 1776 Lost && 1777 ((Status & APFloat::opOverflow) != 0 || 1778 (Status & APFloat::opUnderflow) != 0)) { 1779 return false; 1780 } 1781 1782 return true; 1783 } 1784 1785 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1786 return isUIntN(Size, Val) || isIntN(Size, Val); 1787 } 1788 1789 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1790 if (VT.getScalarType() == MVT::i16) { 1791 // FP immediate values are broken. 1792 return isInlinableIntLiteral(Val); 1793 } 1794 1795 // f16/v2f16 operands work correctly for all values. 1796 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1797 } 1798 1799 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1800 1801 // This is a hack to enable named inline values like 1802 // shared_base with both 32-bit and 64-bit operands. 1803 // Note that these values are defined as 1804 // 32-bit operands only. 1805 if (isInlineValue()) { 1806 return true; 1807 } 1808 1809 if (!isImmTy(ImmTyNone)) { 1810 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1811 return false; 1812 } 1813 // TODO: We should avoid using host float here. It would be better to 1814 // check the float bit values which is what a few other places do. 1815 // We've had bot failures before due to weird NaN support on mips hosts. 1816 1817 APInt Literal(64, Imm.Val); 1818 1819 if (Imm.IsFPImm) { // We got fp literal token 1820 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1821 return AMDGPU::isInlinableLiteral64(Imm.Val, 1822 AsmParser->hasInv2PiInlineImm()); 1823 } 1824 1825 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1826 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1827 return false; 1828 1829 if (type.getScalarSizeInBits() == 16) { 1830 return isInlineableLiteralOp16( 1831 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1832 type, AsmParser->hasInv2PiInlineImm()); 1833 } 1834 1835 // Check if single precision literal is inlinable 1836 return AMDGPU::isInlinableLiteral32( 1837 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1838 AsmParser->hasInv2PiInlineImm()); 1839 } 1840 1841 // We got int literal token. 1842 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1843 return AMDGPU::isInlinableLiteral64(Imm.Val, 1844 AsmParser->hasInv2PiInlineImm()); 1845 } 1846 1847 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1848 return false; 1849 } 1850 1851 if (type.getScalarSizeInBits() == 16) { 1852 return isInlineableLiteralOp16( 1853 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1854 type, AsmParser->hasInv2PiInlineImm()); 1855 } 1856 1857 return AMDGPU::isInlinableLiteral32( 1858 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1859 AsmParser->hasInv2PiInlineImm()); 1860 } 1861 1862 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1863 // Check that this immediate can be added as literal 1864 if (!isImmTy(ImmTyNone)) { 1865 return false; 1866 } 1867 1868 if (!Imm.IsFPImm) { 1869 // We got int literal token. 1870 1871 if (type == MVT::f64 && hasFPModifiers()) { 1872 // Cannot apply fp modifiers to int literals preserving the same semantics 1873 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1874 // disable these cases. 1875 return false; 1876 } 1877 1878 unsigned Size = type.getSizeInBits(); 1879 if (Size == 64) 1880 Size = 32; 1881 1882 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1883 // types. 1884 return isSafeTruncation(Imm.Val, Size); 1885 } 1886 1887 // We got fp literal token 1888 if (type == MVT::f64) { // Expected 64-bit fp operand 1889 // We would set low 64-bits of literal to zeroes but we accept this literals 1890 return true; 1891 } 1892 1893 if (type == MVT::i64) { // Expected 64-bit int operand 1894 // We don't allow fp literals in 64-bit integer instructions. It is 1895 // unclear how we should encode them. 1896 return false; 1897 } 1898 1899 // We allow fp literals with f16x2 operands assuming that the specified 1900 // literal goes into the lower half and the upper half is zero. We also 1901 // require that the literal may be losslesly converted to f16. 1902 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1903 (type == MVT::v2i16)? MVT::i16 : 1904 (type == MVT::v2f32)? MVT::f32 : type; 1905 1906 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1907 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1908 } 1909 1910 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1911 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1912 } 1913 1914 bool AMDGPUOperand::isVRegWithInputMods() const { 1915 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1916 // GFX90A allows DPP on 64-bit operands. 1917 (isRegClass(AMDGPU::VReg_64RegClassID) && 1918 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1919 } 1920 1921 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1922 if (AsmParser->isVI()) 1923 return isVReg32(); 1924 else if (AsmParser->isGFX9Plus()) 1925 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1926 else 1927 return false; 1928 } 1929 1930 bool AMDGPUOperand::isSDWAFP16Operand() const { 1931 return isSDWAOperand(MVT::f16); 1932 } 1933 1934 bool AMDGPUOperand::isSDWAFP32Operand() const { 1935 return isSDWAOperand(MVT::f32); 1936 } 1937 1938 bool AMDGPUOperand::isSDWAInt16Operand() const { 1939 return isSDWAOperand(MVT::i16); 1940 } 1941 1942 bool AMDGPUOperand::isSDWAInt32Operand() const { 1943 return isSDWAOperand(MVT::i32); 1944 } 1945 1946 bool AMDGPUOperand::isBoolReg() const { 1947 auto FB = AsmParser->getFeatureBits(); 1948 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1949 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1950 } 1951 1952 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1953 { 1954 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1955 assert(Size == 2 || Size == 4 || Size == 8); 1956 1957 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1958 1959 if (Imm.Mods.Abs) { 1960 Val &= ~FpSignMask; 1961 } 1962 if (Imm.Mods.Neg) { 1963 Val ^= FpSignMask; 1964 } 1965 1966 return Val; 1967 } 1968 1969 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1970 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1971 Inst.getNumOperands())) { 1972 addLiteralImmOperand(Inst, Imm.Val, 1973 ApplyModifiers & 1974 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1975 } else { 1976 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1977 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1978 setImmKindNone(); 1979 } 1980 } 1981 1982 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1983 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1984 auto OpNum = Inst.getNumOperands(); 1985 // Check that this operand accepts literals 1986 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1987 1988 if (ApplyModifiers) { 1989 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1990 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1991 Val = applyInputFPModifiers(Val, Size); 1992 } 1993 1994 APInt Literal(64, Val); 1995 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1996 1997 if (Imm.IsFPImm) { // We got fp literal token 1998 switch (OpTy) { 1999 case AMDGPU::OPERAND_REG_IMM_INT64: 2000 case AMDGPU::OPERAND_REG_IMM_FP64: 2001 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2002 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2003 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2004 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2005 AsmParser->hasInv2PiInlineImm())) { 2006 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2007 setImmKindConst(); 2008 return; 2009 } 2010 2011 // Non-inlineable 2012 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2013 // For fp operands we check if low 32 bits are zeros 2014 if (Literal.getLoBits(32) != 0) { 2015 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2016 "Can't encode literal as exact 64-bit floating-point operand. " 2017 "Low 32-bits will be set to zero"); 2018 } 2019 2020 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2021 setImmKindLiteral(); 2022 return; 2023 } 2024 2025 // We don't allow fp literals in 64-bit integer instructions. It is 2026 // unclear how we should encode them. This case should be checked earlier 2027 // in predicate methods (isLiteralImm()) 2028 llvm_unreachable("fp literal in 64-bit integer instruction."); 2029 2030 case AMDGPU::OPERAND_REG_IMM_INT32: 2031 case AMDGPU::OPERAND_REG_IMM_FP32: 2032 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2033 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2034 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2035 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2036 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2037 case AMDGPU::OPERAND_REG_IMM_INT16: 2038 case AMDGPU::OPERAND_REG_IMM_FP16: 2039 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2040 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2041 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2042 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2043 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2044 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2045 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2046 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2047 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2048 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2049 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2050 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2051 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2052 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2053 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2054 case AMDGPU::OPERAND_KIMM32: 2055 case AMDGPU::OPERAND_KIMM16: { 2056 bool lost; 2057 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2058 // Convert literal to single precision 2059 FPLiteral.convert(*getOpFltSemantics(OpTy), 2060 APFloat::rmNearestTiesToEven, &lost); 2061 // We allow precision lost but not overflow or underflow. This should be 2062 // checked earlier in isLiteralImm() 2063 2064 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2065 Inst.addOperand(MCOperand::createImm(ImmVal)); 2066 setImmKindLiteral(); 2067 return; 2068 } 2069 default: 2070 llvm_unreachable("invalid operand size"); 2071 } 2072 2073 return; 2074 } 2075 2076 // We got int literal token. 2077 // Only sign extend inline immediates. 2078 switch (OpTy) { 2079 case AMDGPU::OPERAND_REG_IMM_INT32: 2080 case AMDGPU::OPERAND_REG_IMM_FP32: 2081 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2082 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2083 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2085 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2086 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2087 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2088 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2090 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2091 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2092 if (isSafeTruncation(Val, 32) && 2093 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2094 AsmParser->hasInv2PiInlineImm())) { 2095 Inst.addOperand(MCOperand::createImm(Val)); 2096 setImmKindConst(); 2097 return; 2098 } 2099 2100 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2101 setImmKindLiteral(); 2102 return; 2103 2104 case AMDGPU::OPERAND_REG_IMM_INT64: 2105 case AMDGPU::OPERAND_REG_IMM_FP64: 2106 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2107 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2108 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2109 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2110 Inst.addOperand(MCOperand::createImm(Val)); 2111 setImmKindConst(); 2112 return; 2113 } 2114 2115 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2116 setImmKindLiteral(); 2117 return; 2118 2119 case AMDGPU::OPERAND_REG_IMM_INT16: 2120 case AMDGPU::OPERAND_REG_IMM_FP16: 2121 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2122 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2124 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2126 if (isSafeTruncation(Val, 16) && 2127 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2128 AsmParser->hasInv2PiInlineImm())) { 2129 Inst.addOperand(MCOperand::createImm(Val)); 2130 setImmKindConst(); 2131 return; 2132 } 2133 2134 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2135 setImmKindLiteral(); 2136 return; 2137 2138 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2139 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2140 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2141 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2142 assert(isSafeTruncation(Val, 16)); 2143 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2144 AsmParser->hasInv2PiInlineImm())); 2145 2146 Inst.addOperand(MCOperand::createImm(Val)); 2147 return; 2148 } 2149 case AMDGPU::OPERAND_KIMM32: 2150 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2151 setImmKindNone(); 2152 return; 2153 case AMDGPU::OPERAND_KIMM16: 2154 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2155 setImmKindNone(); 2156 return; 2157 default: 2158 llvm_unreachable("invalid operand size"); 2159 } 2160 } 2161 2162 template <unsigned Bitwidth> 2163 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2164 APInt Literal(64, Imm.Val); 2165 setImmKindNone(); 2166 2167 if (!Imm.IsFPImm) { 2168 // We got int literal token. 2169 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2170 return; 2171 } 2172 2173 bool Lost; 2174 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2175 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2176 APFloat::rmNearestTiesToEven, &Lost); 2177 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2178 } 2179 2180 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2181 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2182 } 2183 2184 static bool isInlineValue(unsigned Reg) { 2185 switch (Reg) { 2186 case AMDGPU::SRC_SHARED_BASE: 2187 case AMDGPU::SRC_SHARED_LIMIT: 2188 case AMDGPU::SRC_PRIVATE_BASE: 2189 case AMDGPU::SRC_PRIVATE_LIMIT: 2190 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2191 return true; 2192 case AMDGPU::SRC_VCCZ: 2193 case AMDGPU::SRC_EXECZ: 2194 case AMDGPU::SRC_SCC: 2195 return true; 2196 case AMDGPU::SGPR_NULL: 2197 return true; 2198 default: 2199 return false; 2200 } 2201 } 2202 2203 bool AMDGPUOperand::isInlineValue() const { 2204 return isRegKind() && ::isInlineValue(getReg()); 2205 } 2206 2207 //===----------------------------------------------------------------------===// 2208 // AsmParser 2209 //===----------------------------------------------------------------------===// 2210 2211 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2212 if (Is == IS_VGPR) { 2213 switch (RegWidth) { 2214 default: return -1; 2215 case 1: return AMDGPU::VGPR_32RegClassID; 2216 case 2: return AMDGPU::VReg_64RegClassID; 2217 case 3: return AMDGPU::VReg_96RegClassID; 2218 case 4: return AMDGPU::VReg_128RegClassID; 2219 case 5: return AMDGPU::VReg_160RegClassID; 2220 case 6: return AMDGPU::VReg_192RegClassID; 2221 case 7: return AMDGPU::VReg_224RegClassID; 2222 case 8: return AMDGPU::VReg_256RegClassID; 2223 case 16: return AMDGPU::VReg_512RegClassID; 2224 case 32: return AMDGPU::VReg_1024RegClassID; 2225 } 2226 } else if (Is == IS_TTMP) { 2227 switch (RegWidth) { 2228 default: return -1; 2229 case 1: return AMDGPU::TTMP_32RegClassID; 2230 case 2: return AMDGPU::TTMP_64RegClassID; 2231 case 4: return AMDGPU::TTMP_128RegClassID; 2232 case 8: return AMDGPU::TTMP_256RegClassID; 2233 case 16: return AMDGPU::TTMP_512RegClassID; 2234 } 2235 } else if (Is == IS_SGPR) { 2236 switch (RegWidth) { 2237 default: return -1; 2238 case 1: return AMDGPU::SGPR_32RegClassID; 2239 case 2: return AMDGPU::SGPR_64RegClassID; 2240 case 3: return AMDGPU::SGPR_96RegClassID; 2241 case 4: return AMDGPU::SGPR_128RegClassID; 2242 case 5: return AMDGPU::SGPR_160RegClassID; 2243 case 6: return AMDGPU::SGPR_192RegClassID; 2244 case 7: return AMDGPU::SGPR_224RegClassID; 2245 case 8: return AMDGPU::SGPR_256RegClassID; 2246 case 16: return AMDGPU::SGPR_512RegClassID; 2247 } 2248 } else if (Is == IS_AGPR) { 2249 switch (RegWidth) { 2250 default: return -1; 2251 case 1: return AMDGPU::AGPR_32RegClassID; 2252 case 2: return AMDGPU::AReg_64RegClassID; 2253 case 3: return AMDGPU::AReg_96RegClassID; 2254 case 4: return AMDGPU::AReg_128RegClassID; 2255 case 5: return AMDGPU::AReg_160RegClassID; 2256 case 6: return AMDGPU::AReg_192RegClassID; 2257 case 7: return AMDGPU::AReg_224RegClassID; 2258 case 8: return AMDGPU::AReg_256RegClassID; 2259 case 16: return AMDGPU::AReg_512RegClassID; 2260 case 32: return AMDGPU::AReg_1024RegClassID; 2261 } 2262 } 2263 return -1; 2264 } 2265 2266 static unsigned getSpecialRegForName(StringRef RegName) { 2267 return StringSwitch<unsigned>(RegName) 2268 .Case("exec", AMDGPU::EXEC) 2269 .Case("vcc", AMDGPU::VCC) 2270 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2271 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2272 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2273 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2274 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2275 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2276 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2277 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2278 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2279 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2280 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2281 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2282 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2283 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2284 .Case("m0", AMDGPU::M0) 2285 .Case("vccz", AMDGPU::SRC_VCCZ) 2286 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2287 .Case("execz", AMDGPU::SRC_EXECZ) 2288 .Case("src_execz", AMDGPU::SRC_EXECZ) 2289 .Case("scc", AMDGPU::SRC_SCC) 2290 .Case("src_scc", AMDGPU::SRC_SCC) 2291 .Case("tba", AMDGPU::TBA) 2292 .Case("tma", AMDGPU::TMA) 2293 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2294 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2295 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2296 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2297 .Case("vcc_lo", AMDGPU::VCC_LO) 2298 .Case("vcc_hi", AMDGPU::VCC_HI) 2299 .Case("exec_lo", AMDGPU::EXEC_LO) 2300 .Case("exec_hi", AMDGPU::EXEC_HI) 2301 .Case("tma_lo", AMDGPU::TMA_LO) 2302 .Case("tma_hi", AMDGPU::TMA_HI) 2303 .Case("tba_lo", AMDGPU::TBA_LO) 2304 .Case("tba_hi", AMDGPU::TBA_HI) 2305 .Case("pc", AMDGPU::PC_REG) 2306 .Case("null", AMDGPU::SGPR_NULL) 2307 .Default(AMDGPU::NoRegister); 2308 } 2309 2310 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2311 SMLoc &EndLoc, bool RestoreOnFailure) { 2312 auto R = parseRegister(); 2313 if (!R) return true; 2314 assert(R->isReg()); 2315 RegNo = R->getReg(); 2316 StartLoc = R->getStartLoc(); 2317 EndLoc = R->getEndLoc(); 2318 return false; 2319 } 2320 2321 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2322 SMLoc &EndLoc) { 2323 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2324 } 2325 2326 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2327 SMLoc &StartLoc, 2328 SMLoc &EndLoc) { 2329 bool Result = 2330 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2331 bool PendingErrors = getParser().hasPendingError(); 2332 getParser().clearPendingErrors(); 2333 if (PendingErrors) 2334 return MatchOperand_ParseFail; 2335 if (Result) 2336 return MatchOperand_NoMatch; 2337 return MatchOperand_Success; 2338 } 2339 2340 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2341 RegisterKind RegKind, unsigned Reg1, 2342 SMLoc Loc) { 2343 switch (RegKind) { 2344 case IS_SPECIAL: 2345 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2346 Reg = AMDGPU::EXEC; 2347 RegWidth = 2; 2348 return true; 2349 } 2350 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2351 Reg = AMDGPU::FLAT_SCR; 2352 RegWidth = 2; 2353 return true; 2354 } 2355 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2356 Reg = AMDGPU::XNACK_MASK; 2357 RegWidth = 2; 2358 return true; 2359 } 2360 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2361 Reg = AMDGPU::VCC; 2362 RegWidth = 2; 2363 return true; 2364 } 2365 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2366 Reg = AMDGPU::TBA; 2367 RegWidth = 2; 2368 return true; 2369 } 2370 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2371 Reg = AMDGPU::TMA; 2372 RegWidth = 2; 2373 return true; 2374 } 2375 Error(Loc, "register does not fit in the list"); 2376 return false; 2377 case IS_VGPR: 2378 case IS_SGPR: 2379 case IS_AGPR: 2380 case IS_TTMP: 2381 if (Reg1 != Reg + RegWidth) { 2382 Error(Loc, "registers in a list must have consecutive indices"); 2383 return false; 2384 } 2385 RegWidth++; 2386 return true; 2387 default: 2388 llvm_unreachable("unexpected register kind"); 2389 } 2390 } 2391 2392 struct RegInfo { 2393 StringLiteral Name; 2394 RegisterKind Kind; 2395 }; 2396 2397 static constexpr RegInfo RegularRegisters[] = { 2398 {{"v"}, IS_VGPR}, 2399 {{"s"}, IS_SGPR}, 2400 {{"ttmp"}, IS_TTMP}, 2401 {{"acc"}, IS_AGPR}, 2402 {{"a"}, IS_AGPR}, 2403 }; 2404 2405 static bool isRegularReg(RegisterKind Kind) { 2406 return Kind == IS_VGPR || 2407 Kind == IS_SGPR || 2408 Kind == IS_TTMP || 2409 Kind == IS_AGPR; 2410 } 2411 2412 static const RegInfo* getRegularRegInfo(StringRef Str) { 2413 for (const RegInfo &Reg : RegularRegisters) 2414 if (Str.startswith(Reg.Name)) 2415 return &Reg; 2416 return nullptr; 2417 } 2418 2419 static bool getRegNum(StringRef Str, unsigned& Num) { 2420 return !Str.getAsInteger(10, Num); 2421 } 2422 2423 bool 2424 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2425 const AsmToken &NextToken) const { 2426 2427 // A list of consecutive registers: [s0,s1,s2,s3] 2428 if (Token.is(AsmToken::LBrac)) 2429 return true; 2430 2431 if (!Token.is(AsmToken::Identifier)) 2432 return false; 2433 2434 // A single register like s0 or a range of registers like s[0:1] 2435 2436 StringRef Str = Token.getString(); 2437 const RegInfo *Reg = getRegularRegInfo(Str); 2438 if (Reg) { 2439 StringRef RegName = Reg->Name; 2440 StringRef RegSuffix = Str.substr(RegName.size()); 2441 if (!RegSuffix.empty()) { 2442 unsigned Num; 2443 // A single register with an index: rXX 2444 if (getRegNum(RegSuffix, Num)) 2445 return true; 2446 } else { 2447 // A range of registers: r[XX:YY]. 2448 if (NextToken.is(AsmToken::LBrac)) 2449 return true; 2450 } 2451 } 2452 2453 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2454 } 2455 2456 bool 2457 AMDGPUAsmParser::isRegister() 2458 { 2459 return isRegister(getToken(), peekToken()); 2460 } 2461 2462 unsigned 2463 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2464 unsigned RegNum, 2465 unsigned RegWidth, 2466 SMLoc Loc) { 2467 2468 assert(isRegularReg(RegKind)); 2469 2470 unsigned AlignSize = 1; 2471 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2472 // SGPR and TTMP registers must be aligned. 2473 // Max required alignment is 4 dwords. 2474 AlignSize = std::min(RegWidth, 4u); 2475 } 2476 2477 if (RegNum % AlignSize != 0) { 2478 Error(Loc, "invalid register alignment"); 2479 return AMDGPU::NoRegister; 2480 } 2481 2482 unsigned RegIdx = RegNum / AlignSize; 2483 int RCID = getRegClass(RegKind, RegWidth); 2484 if (RCID == -1) { 2485 Error(Loc, "invalid or unsupported register size"); 2486 return AMDGPU::NoRegister; 2487 } 2488 2489 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2490 const MCRegisterClass RC = TRI->getRegClass(RCID); 2491 if (RegIdx >= RC.getNumRegs()) { 2492 Error(Loc, "register index is out of range"); 2493 return AMDGPU::NoRegister; 2494 } 2495 2496 return RC.getRegister(RegIdx); 2497 } 2498 2499 bool 2500 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2501 int64_t RegLo, RegHi; 2502 if (!skipToken(AsmToken::LBrac, "missing register index")) 2503 return false; 2504 2505 SMLoc FirstIdxLoc = getLoc(); 2506 SMLoc SecondIdxLoc; 2507 2508 if (!parseExpr(RegLo)) 2509 return false; 2510 2511 if (trySkipToken(AsmToken::Colon)) { 2512 SecondIdxLoc = getLoc(); 2513 if (!parseExpr(RegHi)) 2514 return false; 2515 } else { 2516 RegHi = RegLo; 2517 } 2518 2519 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2520 return false; 2521 2522 if (!isUInt<32>(RegLo)) { 2523 Error(FirstIdxLoc, "invalid register index"); 2524 return false; 2525 } 2526 2527 if (!isUInt<32>(RegHi)) { 2528 Error(SecondIdxLoc, "invalid register index"); 2529 return false; 2530 } 2531 2532 if (RegLo > RegHi) { 2533 Error(FirstIdxLoc, "first register index should not exceed second index"); 2534 return false; 2535 } 2536 2537 Num = static_cast<unsigned>(RegLo); 2538 Width = (RegHi - RegLo) + 1; 2539 return true; 2540 } 2541 2542 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2543 unsigned &RegNum, unsigned &RegWidth, 2544 SmallVectorImpl<AsmToken> &Tokens) { 2545 assert(isToken(AsmToken::Identifier)); 2546 unsigned Reg = getSpecialRegForName(getTokenStr()); 2547 if (Reg) { 2548 RegNum = 0; 2549 RegWidth = 1; 2550 RegKind = IS_SPECIAL; 2551 Tokens.push_back(getToken()); 2552 lex(); // skip register name 2553 } 2554 return Reg; 2555 } 2556 2557 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2558 unsigned &RegNum, unsigned &RegWidth, 2559 SmallVectorImpl<AsmToken> &Tokens) { 2560 assert(isToken(AsmToken::Identifier)); 2561 StringRef RegName = getTokenStr(); 2562 auto Loc = getLoc(); 2563 2564 const RegInfo *RI = getRegularRegInfo(RegName); 2565 if (!RI) { 2566 Error(Loc, "invalid register name"); 2567 return AMDGPU::NoRegister; 2568 } 2569 2570 Tokens.push_back(getToken()); 2571 lex(); // skip register name 2572 2573 RegKind = RI->Kind; 2574 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2575 if (!RegSuffix.empty()) { 2576 // Single 32-bit register: vXX. 2577 if (!getRegNum(RegSuffix, RegNum)) { 2578 Error(Loc, "invalid register index"); 2579 return AMDGPU::NoRegister; 2580 } 2581 RegWidth = 1; 2582 } else { 2583 // Range of registers: v[XX:YY]. ":YY" is optional. 2584 if (!ParseRegRange(RegNum, RegWidth)) 2585 return AMDGPU::NoRegister; 2586 } 2587 2588 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2589 } 2590 2591 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2592 unsigned &RegWidth, 2593 SmallVectorImpl<AsmToken> &Tokens) { 2594 unsigned Reg = AMDGPU::NoRegister; 2595 auto ListLoc = getLoc(); 2596 2597 if (!skipToken(AsmToken::LBrac, 2598 "expected a register or a list of registers")) { 2599 return AMDGPU::NoRegister; 2600 } 2601 2602 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2603 2604 auto Loc = getLoc(); 2605 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2606 return AMDGPU::NoRegister; 2607 if (RegWidth != 1) { 2608 Error(Loc, "expected a single 32-bit register"); 2609 return AMDGPU::NoRegister; 2610 } 2611 2612 for (; trySkipToken(AsmToken::Comma); ) { 2613 RegisterKind NextRegKind; 2614 unsigned NextReg, NextRegNum, NextRegWidth; 2615 Loc = getLoc(); 2616 2617 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2618 NextRegNum, NextRegWidth, 2619 Tokens)) { 2620 return AMDGPU::NoRegister; 2621 } 2622 if (NextRegWidth != 1) { 2623 Error(Loc, "expected a single 32-bit register"); 2624 return AMDGPU::NoRegister; 2625 } 2626 if (NextRegKind != RegKind) { 2627 Error(Loc, "registers in a list must be of the same kind"); 2628 return AMDGPU::NoRegister; 2629 } 2630 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2631 return AMDGPU::NoRegister; 2632 } 2633 2634 if (!skipToken(AsmToken::RBrac, 2635 "expected a comma or a closing square bracket")) { 2636 return AMDGPU::NoRegister; 2637 } 2638 2639 if (isRegularReg(RegKind)) 2640 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2641 2642 return Reg; 2643 } 2644 2645 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2646 unsigned &RegNum, unsigned &RegWidth, 2647 SmallVectorImpl<AsmToken> &Tokens) { 2648 auto Loc = getLoc(); 2649 Reg = AMDGPU::NoRegister; 2650 2651 if (isToken(AsmToken::Identifier)) { 2652 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2653 if (Reg == AMDGPU::NoRegister) 2654 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2655 } else { 2656 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2657 } 2658 2659 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2660 if (Reg == AMDGPU::NoRegister) { 2661 assert(Parser.hasPendingError()); 2662 return false; 2663 } 2664 2665 if (!subtargetHasRegister(*TRI, Reg)) { 2666 if (Reg == AMDGPU::SGPR_NULL) { 2667 Error(Loc, "'null' operand is not supported on this GPU"); 2668 } else { 2669 Error(Loc, "register not available on this GPU"); 2670 } 2671 return false; 2672 } 2673 2674 return true; 2675 } 2676 2677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2678 unsigned &RegNum, unsigned &RegWidth, 2679 bool RestoreOnFailure /*=false*/) { 2680 Reg = AMDGPU::NoRegister; 2681 2682 SmallVector<AsmToken, 1> Tokens; 2683 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2684 if (RestoreOnFailure) { 2685 while (!Tokens.empty()) { 2686 getLexer().UnLex(Tokens.pop_back_val()); 2687 } 2688 } 2689 return true; 2690 } 2691 return false; 2692 } 2693 2694 Optional<StringRef> 2695 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2696 switch (RegKind) { 2697 case IS_VGPR: 2698 return StringRef(".amdgcn.next_free_vgpr"); 2699 case IS_SGPR: 2700 return StringRef(".amdgcn.next_free_sgpr"); 2701 default: 2702 return None; 2703 } 2704 } 2705 2706 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2707 auto SymbolName = getGprCountSymbolName(RegKind); 2708 assert(SymbolName && "initializing invalid register kind"); 2709 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2710 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2711 } 2712 2713 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2714 unsigned DwordRegIndex, 2715 unsigned RegWidth) { 2716 // Symbols are only defined for GCN targets 2717 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2718 return true; 2719 2720 auto SymbolName = getGprCountSymbolName(RegKind); 2721 if (!SymbolName) 2722 return true; 2723 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2724 2725 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2726 int64_t OldCount; 2727 2728 if (!Sym->isVariable()) 2729 return !Error(getLoc(), 2730 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2731 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2732 return !Error( 2733 getLoc(), 2734 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2735 2736 if (OldCount <= NewMax) 2737 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2738 2739 return true; 2740 } 2741 2742 std::unique_ptr<AMDGPUOperand> 2743 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2744 const auto &Tok = getToken(); 2745 SMLoc StartLoc = Tok.getLoc(); 2746 SMLoc EndLoc = Tok.getEndLoc(); 2747 RegisterKind RegKind; 2748 unsigned Reg, RegNum, RegWidth; 2749 2750 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2751 return nullptr; 2752 } 2753 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2754 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2755 return nullptr; 2756 } else 2757 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2758 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2759 } 2760 2761 OperandMatchResultTy 2762 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2763 // TODO: add syntactic sugar for 1/(2*PI) 2764 2765 assert(!isRegister()); 2766 assert(!isModifier()); 2767 2768 const auto& Tok = getToken(); 2769 const auto& NextTok = peekToken(); 2770 bool IsReal = Tok.is(AsmToken::Real); 2771 SMLoc S = getLoc(); 2772 bool Negate = false; 2773 2774 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2775 lex(); 2776 IsReal = true; 2777 Negate = true; 2778 } 2779 2780 if (IsReal) { 2781 // Floating-point expressions are not supported. 2782 // Can only allow floating-point literals with an 2783 // optional sign. 2784 2785 StringRef Num = getTokenStr(); 2786 lex(); 2787 2788 APFloat RealVal(APFloat::IEEEdouble()); 2789 auto roundMode = APFloat::rmNearestTiesToEven; 2790 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2791 return MatchOperand_ParseFail; 2792 } 2793 if (Negate) 2794 RealVal.changeSign(); 2795 2796 Operands.push_back( 2797 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2798 AMDGPUOperand::ImmTyNone, true)); 2799 2800 return MatchOperand_Success; 2801 2802 } else { 2803 int64_t IntVal; 2804 const MCExpr *Expr; 2805 SMLoc S = getLoc(); 2806 2807 if (HasSP3AbsModifier) { 2808 // This is a workaround for handling expressions 2809 // as arguments of SP3 'abs' modifier, for example: 2810 // |1.0| 2811 // |-1| 2812 // |1+x| 2813 // This syntax is not compatible with syntax of standard 2814 // MC expressions (due to the trailing '|'). 2815 SMLoc EndLoc; 2816 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2817 return MatchOperand_ParseFail; 2818 } else { 2819 if (Parser.parseExpression(Expr)) 2820 return MatchOperand_ParseFail; 2821 } 2822 2823 if (Expr->evaluateAsAbsolute(IntVal)) { 2824 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2825 } else { 2826 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2827 } 2828 2829 return MatchOperand_Success; 2830 } 2831 2832 return MatchOperand_NoMatch; 2833 } 2834 2835 OperandMatchResultTy 2836 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2837 if (!isRegister()) 2838 return MatchOperand_NoMatch; 2839 2840 if (auto R = parseRegister()) { 2841 assert(R->isReg()); 2842 Operands.push_back(std::move(R)); 2843 return MatchOperand_Success; 2844 } 2845 return MatchOperand_ParseFail; 2846 } 2847 2848 OperandMatchResultTy 2849 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2850 auto res = parseReg(Operands); 2851 if (res != MatchOperand_NoMatch) { 2852 return res; 2853 } else if (isModifier()) { 2854 return MatchOperand_NoMatch; 2855 } else { 2856 return parseImm(Operands, HasSP3AbsMod); 2857 } 2858 } 2859 2860 bool 2861 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2862 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2863 const auto &str = Token.getString(); 2864 return str == "abs" || str == "neg" || str == "sext"; 2865 } 2866 return false; 2867 } 2868 2869 bool 2870 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2871 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2872 } 2873 2874 bool 2875 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2876 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2877 } 2878 2879 bool 2880 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2881 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2882 } 2883 2884 // Check if this is an operand modifier or an opcode modifier 2885 // which may look like an expression but it is not. We should 2886 // avoid parsing these modifiers as expressions. Currently 2887 // recognized sequences are: 2888 // |...| 2889 // abs(...) 2890 // neg(...) 2891 // sext(...) 2892 // -reg 2893 // -|...| 2894 // -abs(...) 2895 // name:... 2896 // Note that simple opcode modifiers like 'gds' may be parsed as 2897 // expressions; this is a special case. See getExpressionAsToken. 2898 // 2899 bool 2900 AMDGPUAsmParser::isModifier() { 2901 2902 AsmToken Tok = getToken(); 2903 AsmToken NextToken[2]; 2904 peekTokens(NextToken); 2905 2906 return isOperandModifier(Tok, NextToken[0]) || 2907 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2908 isOpcodeModifierWithVal(Tok, NextToken[0]); 2909 } 2910 2911 // Check if the current token is an SP3 'neg' modifier. 2912 // Currently this modifier is allowed in the following context: 2913 // 2914 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2915 // 2. Before an 'abs' modifier: -abs(...) 2916 // 3. Before an SP3 'abs' modifier: -|...| 2917 // 2918 // In all other cases "-" is handled as a part 2919 // of an expression that follows the sign. 2920 // 2921 // Note: When "-" is followed by an integer literal, 2922 // this is interpreted as integer negation rather 2923 // than a floating-point NEG modifier applied to N. 2924 // Beside being contr-intuitive, such use of floating-point 2925 // NEG modifier would have resulted in different meaning 2926 // of integer literals used with VOP1/2/C and VOP3, 2927 // for example: 2928 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2929 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2930 // Negative fp literals with preceding "-" are 2931 // handled likewise for unifomtity 2932 // 2933 bool 2934 AMDGPUAsmParser::parseSP3NegModifier() { 2935 2936 AsmToken NextToken[2]; 2937 peekTokens(NextToken); 2938 2939 if (isToken(AsmToken::Minus) && 2940 (isRegister(NextToken[0], NextToken[1]) || 2941 NextToken[0].is(AsmToken::Pipe) || 2942 isId(NextToken[0], "abs"))) { 2943 lex(); 2944 return true; 2945 } 2946 2947 return false; 2948 } 2949 2950 OperandMatchResultTy 2951 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2952 bool AllowImm) { 2953 bool Neg, SP3Neg; 2954 bool Abs, SP3Abs; 2955 SMLoc Loc; 2956 2957 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2958 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2959 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2960 return MatchOperand_ParseFail; 2961 } 2962 2963 SP3Neg = parseSP3NegModifier(); 2964 2965 Loc = getLoc(); 2966 Neg = trySkipId("neg"); 2967 if (Neg && SP3Neg) { 2968 Error(Loc, "expected register or immediate"); 2969 return MatchOperand_ParseFail; 2970 } 2971 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2972 return MatchOperand_ParseFail; 2973 2974 Abs = trySkipId("abs"); 2975 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2976 return MatchOperand_ParseFail; 2977 2978 Loc = getLoc(); 2979 SP3Abs = trySkipToken(AsmToken::Pipe); 2980 if (Abs && SP3Abs) { 2981 Error(Loc, "expected register or immediate"); 2982 return MatchOperand_ParseFail; 2983 } 2984 2985 OperandMatchResultTy Res; 2986 if (AllowImm) { 2987 Res = parseRegOrImm(Operands, SP3Abs); 2988 } else { 2989 Res = parseReg(Operands); 2990 } 2991 if (Res != MatchOperand_Success) { 2992 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2993 } 2994 2995 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2996 return MatchOperand_ParseFail; 2997 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2998 return MatchOperand_ParseFail; 2999 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3000 return MatchOperand_ParseFail; 3001 3002 AMDGPUOperand::Modifiers Mods; 3003 Mods.Abs = Abs || SP3Abs; 3004 Mods.Neg = Neg || SP3Neg; 3005 3006 if (Mods.hasFPModifiers()) { 3007 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3008 if (Op.isExpr()) { 3009 Error(Op.getStartLoc(), "expected an absolute expression"); 3010 return MatchOperand_ParseFail; 3011 } 3012 Op.setModifiers(Mods); 3013 } 3014 return MatchOperand_Success; 3015 } 3016 3017 OperandMatchResultTy 3018 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3019 bool AllowImm) { 3020 bool Sext = trySkipId("sext"); 3021 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3022 return MatchOperand_ParseFail; 3023 3024 OperandMatchResultTy Res; 3025 if (AllowImm) { 3026 Res = parseRegOrImm(Operands); 3027 } else { 3028 Res = parseReg(Operands); 3029 } 3030 if (Res != MatchOperand_Success) { 3031 return Sext? MatchOperand_ParseFail : Res; 3032 } 3033 3034 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3035 return MatchOperand_ParseFail; 3036 3037 AMDGPUOperand::Modifiers Mods; 3038 Mods.Sext = Sext; 3039 3040 if (Mods.hasIntModifiers()) { 3041 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3042 if (Op.isExpr()) { 3043 Error(Op.getStartLoc(), "expected an absolute expression"); 3044 return MatchOperand_ParseFail; 3045 } 3046 Op.setModifiers(Mods); 3047 } 3048 3049 return MatchOperand_Success; 3050 } 3051 3052 OperandMatchResultTy 3053 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3054 return parseRegOrImmWithFPInputMods(Operands, false); 3055 } 3056 3057 OperandMatchResultTy 3058 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3059 return parseRegOrImmWithIntInputMods(Operands, false); 3060 } 3061 3062 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3063 auto Loc = getLoc(); 3064 if (trySkipId("off")) { 3065 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3066 AMDGPUOperand::ImmTyOff, false)); 3067 return MatchOperand_Success; 3068 } 3069 3070 if (!isRegister()) 3071 return MatchOperand_NoMatch; 3072 3073 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3074 if (Reg) { 3075 Operands.push_back(std::move(Reg)); 3076 return MatchOperand_Success; 3077 } 3078 3079 return MatchOperand_ParseFail; 3080 3081 } 3082 3083 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3084 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3085 3086 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3087 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3088 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3089 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3090 return Match_InvalidOperand; 3091 3092 if ((TSFlags & SIInstrFlags::VOP3) && 3093 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3094 getForcedEncodingSize() != 64) 3095 return Match_PreferE32; 3096 3097 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3098 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3099 // v_mac_f32/16 allow only dst_sel == DWORD; 3100 auto OpNum = 3101 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3102 const auto &Op = Inst.getOperand(OpNum); 3103 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3104 return Match_InvalidOperand; 3105 } 3106 } 3107 3108 return Match_Success; 3109 } 3110 3111 static ArrayRef<unsigned> getAllVariants() { 3112 static const unsigned Variants[] = { 3113 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3114 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3115 }; 3116 3117 return makeArrayRef(Variants); 3118 } 3119 3120 // What asm variants we should check 3121 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3122 if (getForcedEncodingSize() == 32) { 3123 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3124 return makeArrayRef(Variants); 3125 } 3126 3127 if (isForcedVOP3()) { 3128 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3129 return makeArrayRef(Variants); 3130 } 3131 3132 if (isForcedSDWA()) { 3133 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3134 AMDGPUAsmVariants::SDWA9}; 3135 return makeArrayRef(Variants); 3136 } 3137 3138 if (isForcedDPP()) { 3139 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3140 return makeArrayRef(Variants); 3141 } 3142 3143 return getAllVariants(); 3144 } 3145 3146 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3147 if (getForcedEncodingSize() == 32) 3148 return "e32"; 3149 3150 if (isForcedVOP3()) 3151 return "e64"; 3152 3153 if (isForcedSDWA()) 3154 return "sdwa"; 3155 3156 if (isForcedDPP()) 3157 return "dpp"; 3158 3159 return ""; 3160 } 3161 3162 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3163 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3164 const unsigned Num = Desc.getNumImplicitUses(); 3165 for (unsigned i = 0; i < Num; ++i) { 3166 unsigned Reg = Desc.ImplicitUses[i]; 3167 switch (Reg) { 3168 case AMDGPU::FLAT_SCR: 3169 case AMDGPU::VCC: 3170 case AMDGPU::VCC_LO: 3171 case AMDGPU::VCC_HI: 3172 case AMDGPU::M0: 3173 return Reg; 3174 default: 3175 break; 3176 } 3177 } 3178 return AMDGPU::NoRegister; 3179 } 3180 3181 // NB: This code is correct only when used to check constant 3182 // bus limitations because GFX7 support no f16 inline constants. 3183 // Note that there are no cases when a GFX7 opcode violates 3184 // constant bus limitations due to the use of an f16 constant. 3185 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3186 unsigned OpIdx) const { 3187 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3188 3189 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3190 return false; 3191 } 3192 3193 const MCOperand &MO = Inst.getOperand(OpIdx); 3194 3195 int64_t Val = MO.getImm(); 3196 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3197 3198 switch (OpSize) { // expected operand size 3199 case 8: 3200 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3201 case 4: 3202 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3203 case 2: { 3204 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3205 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3206 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3207 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3208 return AMDGPU::isInlinableIntLiteral(Val); 3209 3210 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3211 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3212 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3213 return AMDGPU::isInlinableIntLiteralV216(Val); 3214 3215 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3216 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3217 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3218 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3219 3220 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3221 } 3222 default: 3223 llvm_unreachable("invalid operand size"); 3224 } 3225 } 3226 3227 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3228 if (!isGFX10Plus()) 3229 return 1; 3230 3231 switch (Opcode) { 3232 // 64-bit shift instructions can use only one scalar value input 3233 case AMDGPU::V_LSHLREV_B64_e64: 3234 case AMDGPU::V_LSHLREV_B64_gfx10: 3235 case AMDGPU::V_LSHRREV_B64_e64: 3236 case AMDGPU::V_LSHRREV_B64_gfx10: 3237 case AMDGPU::V_ASHRREV_I64_e64: 3238 case AMDGPU::V_ASHRREV_I64_gfx10: 3239 case AMDGPU::V_LSHL_B64_e64: 3240 case AMDGPU::V_LSHR_B64_e64: 3241 case AMDGPU::V_ASHR_I64_e64: 3242 return 1; 3243 default: 3244 return 2; 3245 } 3246 } 3247 3248 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3249 const MCOperand &MO = Inst.getOperand(OpIdx); 3250 if (MO.isImm()) { 3251 return !isInlineConstant(Inst, OpIdx); 3252 } else if (MO.isReg()) { 3253 auto Reg = MO.getReg(); 3254 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3255 auto PReg = mc2PseudoReg(Reg); 3256 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3257 } else { 3258 return true; 3259 } 3260 } 3261 3262 bool 3263 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3264 const OperandVector &Operands) { 3265 const unsigned Opcode = Inst.getOpcode(); 3266 const MCInstrDesc &Desc = MII.get(Opcode); 3267 unsigned LastSGPR = AMDGPU::NoRegister; 3268 unsigned ConstantBusUseCount = 0; 3269 unsigned NumLiterals = 0; 3270 unsigned LiteralSize; 3271 3272 if (Desc.TSFlags & 3273 (SIInstrFlags::VOPC | 3274 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3275 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3276 SIInstrFlags::SDWA)) { 3277 // Check special imm operands (used by madmk, etc) 3278 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3279 ++NumLiterals; 3280 LiteralSize = 4; 3281 } 3282 3283 SmallDenseSet<unsigned> SGPRsUsed; 3284 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3285 if (SGPRUsed != AMDGPU::NoRegister) { 3286 SGPRsUsed.insert(SGPRUsed); 3287 ++ConstantBusUseCount; 3288 } 3289 3290 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3291 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3292 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3293 3294 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3295 3296 for (int OpIdx : OpIndices) { 3297 if (OpIdx == -1) break; 3298 3299 const MCOperand &MO = Inst.getOperand(OpIdx); 3300 if (usesConstantBus(Inst, OpIdx)) { 3301 if (MO.isReg()) { 3302 LastSGPR = mc2PseudoReg(MO.getReg()); 3303 // Pairs of registers with a partial intersections like these 3304 // s0, s[0:1] 3305 // flat_scratch_lo, flat_scratch 3306 // flat_scratch_lo, flat_scratch_hi 3307 // are theoretically valid but they are disabled anyway. 3308 // Note that this code mimics SIInstrInfo::verifyInstruction 3309 if (!SGPRsUsed.count(LastSGPR)) { 3310 SGPRsUsed.insert(LastSGPR); 3311 ++ConstantBusUseCount; 3312 } 3313 } else { // Expression or a literal 3314 3315 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3316 continue; // special operand like VINTERP attr_chan 3317 3318 // An instruction may use only one literal. 3319 // This has been validated on the previous step. 3320 // See validateVOPLiteral. 3321 // This literal may be used as more than one operand. 3322 // If all these operands are of the same size, 3323 // this literal counts as one scalar value. 3324 // Otherwise it counts as 2 scalar values. 3325 // See "GFX10 Shader Programming", section 3.6.2.3. 3326 3327 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3328 if (Size < 4) Size = 4; 3329 3330 if (NumLiterals == 0) { 3331 NumLiterals = 1; 3332 LiteralSize = Size; 3333 } else if (LiteralSize != Size) { 3334 NumLiterals = 2; 3335 } 3336 } 3337 } 3338 } 3339 } 3340 ConstantBusUseCount += NumLiterals; 3341 3342 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3343 return true; 3344 3345 SMLoc LitLoc = getLitLoc(Operands); 3346 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3347 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3348 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3349 return false; 3350 } 3351 3352 bool 3353 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3354 const OperandVector &Operands) { 3355 const unsigned Opcode = Inst.getOpcode(); 3356 const MCInstrDesc &Desc = MII.get(Opcode); 3357 3358 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3359 if (DstIdx == -1 || 3360 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3361 return true; 3362 } 3363 3364 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3365 3366 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3367 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3368 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3369 3370 assert(DstIdx != -1); 3371 const MCOperand &Dst = Inst.getOperand(DstIdx); 3372 assert(Dst.isReg()); 3373 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3374 3375 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3376 3377 for (int SrcIdx : SrcIndices) { 3378 if (SrcIdx == -1) break; 3379 const MCOperand &Src = Inst.getOperand(SrcIdx); 3380 if (Src.isReg()) { 3381 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3382 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3383 Error(getRegLoc(SrcReg, Operands), 3384 "destination must be different than all sources"); 3385 return false; 3386 } 3387 } 3388 } 3389 3390 return true; 3391 } 3392 3393 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3394 3395 const unsigned Opc = Inst.getOpcode(); 3396 const MCInstrDesc &Desc = MII.get(Opc); 3397 3398 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3399 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3400 assert(ClampIdx != -1); 3401 return Inst.getOperand(ClampIdx).getImm() == 0; 3402 } 3403 3404 return true; 3405 } 3406 3407 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3408 3409 const unsigned Opc = Inst.getOpcode(); 3410 const MCInstrDesc &Desc = MII.get(Opc); 3411 3412 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3413 return true; 3414 3415 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3416 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3417 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3418 3419 assert(VDataIdx != -1); 3420 3421 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3422 return true; 3423 3424 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3425 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3426 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3427 if (DMask == 0) 3428 DMask = 1; 3429 3430 unsigned DataSize = 3431 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3432 if (hasPackedD16()) { 3433 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3434 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3435 DataSize = (DataSize + 1) / 2; 3436 } 3437 3438 return (VDataSize / 4) == DataSize + TFESize; 3439 } 3440 3441 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3442 const unsigned Opc = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opc); 3444 3445 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3446 return true; 3447 3448 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3449 3450 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3451 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3452 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3453 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3454 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3455 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3456 3457 assert(VAddr0Idx != -1); 3458 assert(SrsrcIdx != -1); 3459 assert(SrsrcIdx > VAddr0Idx); 3460 3461 if (DimIdx == -1) 3462 return true; // intersect_ray 3463 3464 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3465 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3466 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3467 unsigned ActualAddrSize = 3468 IsNSA ? SrsrcIdx - VAddr0Idx 3469 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3470 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3471 3472 unsigned ExpectedAddrSize = 3473 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3474 3475 if (!IsNSA) { 3476 if (ExpectedAddrSize > 8) 3477 ExpectedAddrSize = 16; 3478 3479 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3480 // This provides backward compatibility for assembly created 3481 // before 160b/192b/224b types were directly supported. 3482 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3483 return true; 3484 } 3485 3486 return ActualAddrSize == ExpectedAddrSize; 3487 } 3488 3489 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3490 3491 const unsigned Opc = Inst.getOpcode(); 3492 const MCInstrDesc &Desc = MII.get(Opc); 3493 3494 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3495 return true; 3496 if (!Desc.mayLoad() || !Desc.mayStore()) 3497 return true; // Not atomic 3498 3499 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3500 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3501 3502 // This is an incomplete check because image_atomic_cmpswap 3503 // may only use 0x3 and 0xf while other atomic operations 3504 // may use 0x1 and 0x3. However these limitations are 3505 // verified when we check that dmask matches dst size. 3506 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3507 } 3508 3509 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3510 3511 const unsigned Opc = Inst.getOpcode(); 3512 const MCInstrDesc &Desc = MII.get(Opc); 3513 3514 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3515 return true; 3516 3517 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3518 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3519 3520 // GATHER4 instructions use dmask in a different fashion compared to 3521 // other MIMG instructions. The only useful DMASK values are 3522 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3523 // (red,red,red,red) etc.) The ISA document doesn't mention 3524 // this. 3525 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3526 } 3527 3528 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3529 const unsigned Opc = Inst.getOpcode(); 3530 const MCInstrDesc &Desc = MII.get(Opc); 3531 3532 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3533 return true; 3534 3535 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3536 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3537 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3538 3539 if (!BaseOpcode->MSAA) 3540 return true; 3541 3542 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3543 assert(DimIdx != -1); 3544 3545 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3546 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3547 3548 return DimInfo->MSAA; 3549 } 3550 3551 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3552 { 3553 switch (Opcode) { 3554 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3555 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3556 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3557 return true; 3558 default: 3559 return false; 3560 } 3561 } 3562 3563 // movrels* opcodes should only allow VGPRS as src0. 3564 // This is specified in .td description for vop1/vop3, 3565 // but sdwa is handled differently. See isSDWAOperand. 3566 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3567 const OperandVector &Operands) { 3568 3569 const unsigned Opc = Inst.getOpcode(); 3570 const MCInstrDesc &Desc = MII.get(Opc); 3571 3572 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3573 return true; 3574 3575 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3576 assert(Src0Idx != -1); 3577 3578 SMLoc ErrLoc; 3579 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3580 if (Src0.isReg()) { 3581 auto Reg = mc2PseudoReg(Src0.getReg()); 3582 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3583 if (!isSGPR(Reg, TRI)) 3584 return true; 3585 ErrLoc = getRegLoc(Reg, Operands); 3586 } else { 3587 ErrLoc = getConstLoc(Operands); 3588 } 3589 3590 Error(ErrLoc, "source operand must be a VGPR"); 3591 return false; 3592 } 3593 3594 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3595 const OperandVector &Operands) { 3596 3597 const unsigned Opc = Inst.getOpcode(); 3598 3599 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3600 return true; 3601 3602 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3603 assert(Src0Idx != -1); 3604 3605 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3606 if (!Src0.isReg()) 3607 return true; 3608 3609 auto Reg = mc2PseudoReg(Src0.getReg()); 3610 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3611 if (isSGPR(Reg, TRI)) { 3612 Error(getRegLoc(Reg, Operands), 3613 "source operand must be either a VGPR or an inline constant"); 3614 return false; 3615 } 3616 3617 return true; 3618 } 3619 3620 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3621 const OperandVector &Operands) { 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3626 return true; 3627 3628 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3629 if (Src2Idx == -1) 3630 return true; 3631 3632 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3633 if (!Src2.isReg()) 3634 return true; 3635 3636 MCRegister Src2Reg = Src2.getReg(); 3637 MCRegister DstReg = Inst.getOperand(0).getReg(); 3638 if (Src2Reg == DstReg) 3639 return true; 3640 3641 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3642 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3643 return true; 3644 3645 if (isRegIntersect(Src2Reg, DstReg, TRI)) { 3646 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3647 "source 2 operand must not partially overlap with dst"); 3648 return false; 3649 } 3650 3651 return true; 3652 } 3653 3654 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3655 switch (Inst.getOpcode()) { 3656 default: 3657 return true; 3658 case V_DIV_SCALE_F32_gfx6_gfx7: 3659 case V_DIV_SCALE_F32_vi: 3660 case V_DIV_SCALE_F32_gfx10: 3661 case V_DIV_SCALE_F64_gfx6_gfx7: 3662 case V_DIV_SCALE_F64_vi: 3663 case V_DIV_SCALE_F64_gfx10: 3664 break; 3665 } 3666 3667 // TODO: Check that src0 = src1 or src2. 3668 3669 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3670 AMDGPU::OpName::src2_modifiers, 3671 AMDGPU::OpName::src2_modifiers}) { 3672 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3673 .getImm() & 3674 SISrcMods::ABS) { 3675 return false; 3676 } 3677 } 3678 3679 return true; 3680 } 3681 3682 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3683 3684 const unsigned Opc = Inst.getOpcode(); 3685 const MCInstrDesc &Desc = MII.get(Opc); 3686 3687 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3688 return true; 3689 3690 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3691 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3692 if (isCI() || isSI()) 3693 return false; 3694 } 3695 3696 return true; 3697 } 3698 3699 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3700 const unsigned Opc = Inst.getOpcode(); 3701 const MCInstrDesc &Desc = MII.get(Opc); 3702 3703 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3704 return true; 3705 3706 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3707 if (DimIdx < 0) 3708 return true; 3709 3710 long Imm = Inst.getOperand(DimIdx).getImm(); 3711 if (Imm < 0 || Imm >= 8) 3712 return false; 3713 3714 return true; 3715 } 3716 3717 static bool IsRevOpcode(const unsigned Opcode) 3718 { 3719 switch (Opcode) { 3720 case AMDGPU::V_SUBREV_F32_e32: 3721 case AMDGPU::V_SUBREV_F32_e64: 3722 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3723 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3724 case AMDGPU::V_SUBREV_F32_e32_vi: 3725 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3726 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3727 case AMDGPU::V_SUBREV_F32_e64_vi: 3728 3729 case AMDGPU::V_SUBREV_CO_U32_e32: 3730 case AMDGPU::V_SUBREV_CO_U32_e64: 3731 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3732 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3733 3734 case AMDGPU::V_SUBBREV_U32_e32: 3735 case AMDGPU::V_SUBBREV_U32_e64: 3736 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3737 case AMDGPU::V_SUBBREV_U32_e32_vi: 3738 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3739 case AMDGPU::V_SUBBREV_U32_e64_vi: 3740 3741 case AMDGPU::V_SUBREV_U32_e32: 3742 case AMDGPU::V_SUBREV_U32_e64: 3743 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3744 case AMDGPU::V_SUBREV_U32_e32_vi: 3745 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3746 case AMDGPU::V_SUBREV_U32_e64_vi: 3747 3748 case AMDGPU::V_SUBREV_F16_e32: 3749 case AMDGPU::V_SUBREV_F16_e64: 3750 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3751 case AMDGPU::V_SUBREV_F16_e32_vi: 3752 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3753 case AMDGPU::V_SUBREV_F16_e64_vi: 3754 3755 case AMDGPU::V_SUBREV_U16_e32: 3756 case AMDGPU::V_SUBREV_U16_e64: 3757 case AMDGPU::V_SUBREV_U16_e32_vi: 3758 case AMDGPU::V_SUBREV_U16_e64_vi: 3759 3760 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3761 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3762 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3763 3764 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3765 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3766 3767 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3768 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3769 3770 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3771 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3772 3773 case AMDGPU::V_LSHRREV_B32_e32: 3774 case AMDGPU::V_LSHRREV_B32_e64: 3775 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3776 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3777 case AMDGPU::V_LSHRREV_B32_e32_vi: 3778 case AMDGPU::V_LSHRREV_B32_e64_vi: 3779 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3780 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3781 3782 case AMDGPU::V_ASHRREV_I32_e32: 3783 case AMDGPU::V_ASHRREV_I32_e64: 3784 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3785 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3786 case AMDGPU::V_ASHRREV_I32_e32_vi: 3787 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3788 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3789 case AMDGPU::V_ASHRREV_I32_e64_vi: 3790 3791 case AMDGPU::V_LSHLREV_B32_e32: 3792 case AMDGPU::V_LSHLREV_B32_e64: 3793 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3794 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3795 case AMDGPU::V_LSHLREV_B32_e32_vi: 3796 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3797 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3798 case AMDGPU::V_LSHLREV_B32_e64_vi: 3799 3800 case AMDGPU::V_LSHLREV_B16_e32: 3801 case AMDGPU::V_LSHLREV_B16_e64: 3802 case AMDGPU::V_LSHLREV_B16_e32_vi: 3803 case AMDGPU::V_LSHLREV_B16_e64_vi: 3804 case AMDGPU::V_LSHLREV_B16_gfx10: 3805 3806 case AMDGPU::V_LSHRREV_B16_e32: 3807 case AMDGPU::V_LSHRREV_B16_e64: 3808 case AMDGPU::V_LSHRREV_B16_e32_vi: 3809 case AMDGPU::V_LSHRREV_B16_e64_vi: 3810 case AMDGPU::V_LSHRREV_B16_gfx10: 3811 3812 case AMDGPU::V_ASHRREV_I16_e32: 3813 case AMDGPU::V_ASHRREV_I16_e64: 3814 case AMDGPU::V_ASHRREV_I16_e32_vi: 3815 case AMDGPU::V_ASHRREV_I16_e64_vi: 3816 case AMDGPU::V_ASHRREV_I16_gfx10: 3817 3818 case AMDGPU::V_LSHLREV_B64_e64: 3819 case AMDGPU::V_LSHLREV_B64_gfx10: 3820 case AMDGPU::V_LSHLREV_B64_vi: 3821 3822 case AMDGPU::V_LSHRREV_B64_e64: 3823 case AMDGPU::V_LSHRREV_B64_gfx10: 3824 case AMDGPU::V_LSHRREV_B64_vi: 3825 3826 case AMDGPU::V_ASHRREV_I64_e64: 3827 case AMDGPU::V_ASHRREV_I64_gfx10: 3828 case AMDGPU::V_ASHRREV_I64_vi: 3829 3830 case AMDGPU::V_PK_LSHLREV_B16: 3831 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3832 case AMDGPU::V_PK_LSHLREV_B16_vi: 3833 3834 case AMDGPU::V_PK_LSHRREV_B16: 3835 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3836 case AMDGPU::V_PK_LSHRREV_B16_vi: 3837 case AMDGPU::V_PK_ASHRREV_I16: 3838 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3839 case AMDGPU::V_PK_ASHRREV_I16_vi: 3840 return true; 3841 default: 3842 return false; 3843 } 3844 } 3845 3846 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3847 3848 using namespace SIInstrFlags; 3849 const unsigned Opcode = Inst.getOpcode(); 3850 const MCInstrDesc &Desc = MII.get(Opcode); 3851 3852 // lds_direct register is defined so that it can be used 3853 // with 9-bit operands only. Ignore encodings which do not accept these. 3854 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3855 if ((Desc.TSFlags & Enc) == 0) 3856 return None; 3857 3858 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3859 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3860 if (SrcIdx == -1) 3861 break; 3862 const auto &Src = Inst.getOperand(SrcIdx); 3863 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3864 3865 if (isGFX90A()) 3866 return StringRef("lds_direct is not supported on this GPU"); 3867 3868 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3869 return StringRef("lds_direct cannot be used with this instruction"); 3870 3871 if (SrcName != OpName::src0) 3872 return StringRef("lds_direct may be used as src0 only"); 3873 } 3874 } 3875 3876 return None; 3877 } 3878 3879 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3880 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3881 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3882 if (Op.isFlatOffset()) 3883 return Op.getStartLoc(); 3884 } 3885 return getLoc(); 3886 } 3887 3888 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3889 const OperandVector &Operands) { 3890 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3891 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3892 return true; 3893 3894 auto Opcode = Inst.getOpcode(); 3895 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3896 assert(OpNum != -1); 3897 3898 const auto &Op = Inst.getOperand(OpNum); 3899 if (!hasFlatOffsets() && Op.getImm() != 0) { 3900 Error(getFlatOffsetLoc(Operands), 3901 "flat offset modifier is not supported on this GPU"); 3902 return false; 3903 } 3904 3905 // For FLAT segment the offset must be positive; 3906 // MSB is ignored and forced to zero. 3907 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3908 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3909 if (!isIntN(OffsetSize, Op.getImm())) { 3910 Error(getFlatOffsetLoc(Operands), 3911 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3912 return false; 3913 } 3914 } else { 3915 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3916 if (!isUIntN(OffsetSize, Op.getImm())) { 3917 Error(getFlatOffsetLoc(Operands), 3918 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3919 return false; 3920 } 3921 } 3922 3923 return true; 3924 } 3925 3926 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3927 // Start with second operand because SMEM Offset cannot be dst or src0. 3928 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3930 if (Op.isSMEMOffset()) 3931 return Op.getStartLoc(); 3932 } 3933 return getLoc(); 3934 } 3935 3936 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3937 const OperandVector &Operands) { 3938 if (isCI() || isSI()) 3939 return true; 3940 3941 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3942 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3943 return true; 3944 3945 auto Opcode = Inst.getOpcode(); 3946 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3947 if (OpNum == -1) 3948 return true; 3949 3950 const auto &Op = Inst.getOperand(OpNum); 3951 if (!Op.isImm()) 3952 return true; 3953 3954 uint64_t Offset = Op.getImm(); 3955 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3956 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3957 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3958 return true; 3959 3960 Error(getSMEMOffsetLoc(Operands), 3961 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3962 "expected a 21-bit signed offset"); 3963 3964 return false; 3965 } 3966 3967 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3968 unsigned Opcode = Inst.getOpcode(); 3969 const MCInstrDesc &Desc = MII.get(Opcode); 3970 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3971 return true; 3972 3973 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3974 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3975 3976 const int OpIndices[] = { Src0Idx, Src1Idx }; 3977 3978 unsigned NumExprs = 0; 3979 unsigned NumLiterals = 0; 3980 uint32_t LiteralValue; 3981 3982 for (int OpIdx : OpIndices) { 3983 if (OpIdx == -1) break; 3984 3985 const MCOperand &MO = Inst.getOperand(OpIdx); 3986 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3987 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3988 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3989 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3990 if (NumLiterals == 0 || LiteralValue != Value) { 3991 LiteralValue = Value; 3992 ++NumLiterals; 3993 } 3994 } else if (MO.isExpr()) { 3995 ++NumExprs; 3996 } 3997 } 3998 } 3999 4000 return NumLiterals + NumExprs <= 1; 4001 } 4002 4003 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4004 const unsigned Opc = Inst.getOpcode(); 4005 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4006 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4007 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4008 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4009 4010 if (OpSel & ~3) 4011 return false; 4012 } 4013 return true; 4014 } 4015 4016 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4017 const OperandVector &Operands) { 4018 const unsigned Opc = Inst.getOpcode(); 4019 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4020 if (DppCtrlIdx < 0) 4021 return true; 4022 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4023 4024 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4025 // DPP64 is supported for row_newbcast only. 4026 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4027 if (Src0Idx >= 0 && 4028 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4029 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4030 Error(S, "64 bit dpp only supports row_newbcast"); 4031 return false; 4032 } 4033 } 4034 4035 return true; 4036 } 4037 4038 // Check if VCC register matches wavefront size 4039 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4040 auto FB = getFeatureBits(); 4041 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4042 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4043 } 4044 4045 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4046 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4047 const OperandVector &Operands) { 4048 unsigned Opcode = Inst.getOpcode(); 4049 const MCInstrDesc &Desc = MII.get(Opcode); 4050 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4051 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4052 ImmIdx == -1) 4053 return true; 4054 4055 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4056 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4057 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4058 4059 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4060 4061 unsigned NumExprs = 0; 4062 unsigned NumLiterals = 0; 4063 uint32_t LiteralValue; 4064 4065 for (int OpIdx : OpIndices) { 4066 if (OpIdx == -1) 4067 continue; 4068 4069 const MCOperand &MO = Inst.getOperand(OpIdx); 4070 if (!MO.isImm() && !MO.isExpr()) 4071 continue; 4072 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4073 continue; 4074 4075 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4076 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4077 Error(getConstLoc(Operands), 4078 "inline constants are not allowed for this operand"); 4079 return false; 4080 } 4081 4082 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4083 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4084 if (NumLiterals == 0 || LiteralValue != Value) { 4085 LiteralValue = Value; 4086 ++NumLiterals; 4087 } 4088 } else if (MO.isExpr()) { 4089 ++NumExprs; 4090 } 4091 } 4092 NumLiterals += NumExprs; 4093 4094 if (!NumLiterals) 4095 return true; 4096 4097 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4098 Error(getLitLoc(Operands), "literal operands are not supported"); 4099 return false; 4100 } 4101 4102 if (NumLiterals > 1) { 4103 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4104 return false; 4105 } 4106 4107 return true; 4108 } 4109 4110 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4111 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4112 const MCRegisterInfo *MRI) { 4113 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4114 if (OpIdx < 0) 4115 return -1; 4116 4117 const MCOperand &Op = Inst.getOperand(OpIdx); 4118 if (!Op.isReg()) 4119 return -1; 4120 4121 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4122 auto Reg = Sub ? Sub : Op.getReg(); 4123 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4124 return AGPR32.contains(Reg) ? 1 : 0; 4125 } 4126 4127 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4128 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4129 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4130 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4131 SIInstrFlags::DS)) == 0) 4132 return true; 4133 4134 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4135 : AMDGPU::OpName::vdata; 4136 4137 const MCRegisterInfo *MRI = getMRI(); 4138 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4139 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4140 4141 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4142 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4143 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4144 return false; 4145 } 4146 4147 auto FB = getFeatureBits(); 4148 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4149 if (DataAreg < 0 || DstAreg < 0) 4150 return true; 4151 return DstAreg == DataAreg; 4152 } 4153 4154 return DstAreg < 1 && DataAreg < 1; 4155 } 4156 4157 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4158 auto FB = getFeatureBits(); 4159 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4160 return true; 4161 4162 const MCRegisterInfo *MRI = getMRI(); 4163 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4164 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4165 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4166 const MCOperand &Op = Inst.getOperand(I); 4167 if (!Op.isReg()) 4168 continue; 4169 4170 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4171 if (!Sub) 4172 continue; 4173 4174 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4175 return false; 4176 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4177 return false; 4178 } 4179 4180 return true; 4181 } 4182 4183 // gfx90a has an undocumented limitation: 4184 // DS_GWS opcodes must use even aligned registers. 4185 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4186 const OperandVector &Operands) { 4187 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4188 return true; 4189 4190 int Opc = Inst.getOpcode(); 4191 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4192 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4193 return true; 4194 4195 const MCRegisterInfo *MRI = getMRI(); 4196 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4197 int Data0Pos = 4198 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4199 assert(Data0Pos != -1); 4200 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4201 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4202 if (RegIdx & 1) { 4203 SMLoc RegLoc = getRegLoc(Reg, Operands); 4204 Error(RegLoc, "vgpr must be even aligned"); 4205 return false; 4206 } 4207 4208 return true; 4209 } 4210 4211 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4212 const OperandVector &Operands, 4213 const SMLoc &IDLoc) { 4214 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4215 AMDGPU::OpName::cpol); 4216 if (CPolPos == -1) 4217 return true; 4218 4219 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4220 4221 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4222 if ((TSFlags & (SIInstrFlags::SMRD)) && 4223 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4224 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4225 return false; 4226 } 4227 4228 if (isGFX90A() && (CPol & CPol::SCC)) { 4229 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4230 StringRef CStr(S.getPointer()); 4231 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4232 Error(S, "scc is not supported on this GPU"); 4233 return false; 4234 } 4235 4236 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4237 return true; 4238 4239 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4240 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4241 Error(IDLoc, "instruction must use glc"); 4242 return false; 4243 } 4244 } else { 4245 if (CPol & CPol::GLC) { 4246 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4247 StringRef CStr(S.getPointer()); 4248 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4249 Error(S, "instruction must not use glc"); 4250 return false; 4251 } 4252 } 4253 4254 return true; 4255 } 4256 4257 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4258 const SMLoc &IDLoc, 4259 const OperandVector &Operands) { 4260 if (auto ErrMsg = validateLdsDirect(Inst)) { 4261 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4262 return false; 4263 } 4264 if (!validateSOPLiteral(Inst)) { 4265 Error(getLitLoc(Operands), 4266 "only one literal operand is allowed"); 4267 return false; 4268 } 4269 if (!validateVOPLiteral(Inst, Operands)) { 4270 return false; 4271 } 4272 if (!validateConstantBusLimitations(Inst, Operands)) { 4273 return false; 4274 } 4275 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4276 return false; 4277 } 4278 if (!validateIntClampSupported(Inst)) { 4279 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4280 "integer clamping is not supported on this GPU"); 4281 return false; 4282 } 4283 if (!validateOpSel(Inst)) { 4284 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4285 "invalid op_sel operand"); 4286 return false; 4287 } 4288 if (!validateDPP(Inst, Operands)) { 4289 return false; 4290 } 4291 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4292 if (!validateMIMGD16(Inst)) { 4293 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4294 "d16 modifier is not supported on this GPU"); 4295 return false; 4296 } 4297 if (!validateMIMGDim(Inst)) { 4298 Error(IDLoc, "dim modifier is required on this GPU"); 4299 return false; 4300 } 4301 if (!validateMIMGMSAA(Inst)) { 4302 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4303 "invalid dim; must be MSAA type"); 4304 return false; 4305 } 4306 if (!validateMIMGDataSize(Inst)) { 4307 Error(IDLoc, 4308 "image data size does not match dmask and tfe"); 4309 return false; 4310 } 4311 if (!validateMIMGAddrSize(Inst)) { 4312 Error(IDLoc, 4313 "image address size does not match dim and a16"); 4314 return false; 4315 } 4316 if (!validateMIMGAtomicDMask(Inst)) { 4317 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4318 "invalid atomic image dmask"); 4319 return false; 4320 } 4321 if (!validateMIMGGatherDMask(Inst)) { 4322 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4323 "invalid image_gather dmask: only one bit must be set"); 4324 return false; 4325 } 4326 if (!validateMovrels(Inst, Operands)) { 4327 return false; 4328 } 4329 if (!validateFlatOffset(Inst, Operands)) { 4330 return false; 4331 } 4332 if (!validateSMEMOffset(Inst, Operands)) { 4333 return false; 4334 } 4335 if (!validateMAIAccWrite(Inst, Operands)) { 4336 return false; 4337 } 4338 if (!validateMFMA(Inst, Operands)) { 4339 return false; 4340 } 4341 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4342 return false; 4343 } 4344 4345 if (!validateAGPRLdSt(Inst)) { 4346 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4347 ? "invalid register class: data and dst should be all VGPR or AGPR" 4348 : "invalid register class: agpr loads and stores not supported on this GPU" 4349 ); 4350 return false; 4351 } 4352 if (!validateVGPRAlign(Inst)) { 4353 Error(IDLoc, 4354 "invalid register class: vgpr tuples must be 64 bit aligned"); 4355 return false; 4356 } 4357 if (!validateGWS(Inst, Operands)) { 4358 return false; 4359 } 4360 4361 if (!validateDivScale(Inst)) { 4362 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4363 return false; 4364 } 4365 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4366 return false; 4367 } 4368 4369 return true; 4370 } 4371 4372 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4373 const FeatureBitset &FBS, 4374 unsigned VariantID = 0); 4375 4376 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4377 const FeatureBitset &AvailableFeatures, 4378 unsigned VariantID); 4379 4380 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4381 const FeatureBitset &FBS) { 4382 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4383 } 4384 4385 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4386 const FeatureBitset &FBS, 4387 ArrayRef<unsigned> Variants) { 4388 for (auto Variant : Variants) { 4389 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4390 return true; 4391 } 4392 4393 return false; 4394 } 4395 4396 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4397 const SMLoc &IDLoc) { 4398 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4399 4400 // Check if requested instruction variant is supported. 4401 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4402 return false; 4403 4404 // This instruction is not supported. 4405 // Clear any other pending errors because they are no longer relevant. 4406 getParser().clearPendingErrors(); 4407 4408 // Requested instruction variant is not supported. 4409 // Check if any other variants are supported. 4410 StringRef VariantName = getMatchedVariantName(); 4411 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4412 return Error(IDLoc, 4413 Twine(VariantName, 4414 " variant of this instruction is not supported")); 4415 } 4416 4417 // Finally check if this instruction is supported on any other GPU. 4418 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4419 return Error(IDLoc, "instruction not supported on this GPU"); 4420 } 4421 4422 // Instruction not supported on any GPU. Probably a typo. 4423 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4424 return Error(IDLoc, "invalid instruction" + Suggestion); 4425 } 4426 4427 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4428 OperandVector &Operands, 4429 MCStreamer &Out, 4430 uint64_t &ErrorInfo, 4431 bool MatchingInlineAsm) { 4432 MCInst Inst; 4433 unsigned Result = Match_Success; 4434 for (auto Variant : getMatchedVariants()) { 4435 uint64_t EI; 4436 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4437 Variant); 4438 // We order match statuses from least to most specific. We use most specific 4439 // status as resulting 4440 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4441 if ((R == Match_Success) || 4442 (R == Match_PreferE32) || 4443 (R == Match_MissingFeature && Result != Match_PreferE32) || 4444 (R == Match_InvalidOperand && Result != Match_MissingFeature 4445 && Result != Match_PreferE32) || 4446 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4447 && Result != Match_MissingFeature 4448 && Result != Match_PreferE32)) { 4449 Result = R; 4450 ErrorInfo = EI; 4451 } 4452 if (R == Match_Success) 4453 break; 4454 } 4455 4456 if (Result == Match_Success) { 4457 if (!validateInstruction(Inst, IDLoc, Operands)) { 4458 return true; 4459 } 4460 Inst.setLoc(IDLoc); 4461 Out.emitInstruction(Inst, getSTI()); 4462 return false; 4463 } 4464 4465 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4466 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4467 return true; 4468 } 4469 4470 switch (Result) { 4471 default: break; 4472 case Match_MissingFeature: 4473 // It has been verified that the specified instruction 4474 // mnemonic is valid. A match was found but it requires 4475 // features which are not supported on this GPU. 4476 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4477 4478 case Match_InvalidOperand: { 4479 SMLoc ErrorLoc = IDLoc; 4480 if (ErrorInfo != ~0ULL) { 4481 if (ErrorInfo >= Operands.size()) { 4482 return Error(IDLoc, "too few operands for instruction"); 4483 } 4484 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4485 if (ErrorLoc == SMLoc()) 4486 ErrorLoc = IDLoc; 4487 } 4488 return Error(ErrorLoc, "invalid operand for instruction"); 4489 } 4490 4491 case Match_PreferE32: 4492 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4493 "should be encoded as e32"); 4494 case Match_MnemonicFail: 4495 llvm_unreachable("Invalid instructions should have been handled already"); 4496 } 4497 llvm_unreachable("Implement any new match types added!"); 4498 } 4499 4500 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4501 int64_t Tmp = -1; 4502 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4503 return true; 4504 } 4505 if (getParser().parseAbsoluteExpression(Tmp)) { 4506 return true; 4507 } 4508 Ret = static_cast<uint32_t>(Tmp); 4509 return false; 4510 } 4511 4512 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4513 uint32_t &Minor) { 4514 if (ParseAsAbsoluteExpression(Major)) 4515 return TokError("invalid major version"); 4516 4517 if (!trySkipToken(AsmToken::Comma)) 4518 return TokError("minor version number required, comma expected"); 4519 4520 if (ParseAsAbsoluteExpression(Minor)) 4521 return TokError("invalid minor version"); 4522 4523 return false; 4524 } 4525 4526 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4527 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4528 return TokError("directive only supported for amdgcn architecture"); 4529 4530 std::string TargetIDDirective; 4531 SMLoc TargetStart = getTok().getLoc(); 4532 if (getParser().parseEscapedString(TargetIDDirective)) 4533 return true; 4534 4535 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4536 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4537 return getParser().Error(TargetRange.Start, 4538 (Twine(".amdgcn_target directive's target id ") + 4539 Twine(TargetIDDirective) + 4540 Twine(" does not match the specified target id ") + 4541 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4542 4543 return false; 4544 } 4545 4546 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4547 return Error(Range.Start, "value out of range", Range); 4548 } 4549 4550 bool AMDGPUAsmParser::calculateGPRBlocks( 4551 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4552 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4553 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4554 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4555 // TODO(scott.linder): These calculations are duplicated from 4556 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4557 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4558 4559 unsigned NumVGPRs = NextFreeVGPR; 4560 unsigned NumSGPRs = NextFreeSGPR; 4561 4562 if (Version.Major >= 10) 4563 NumSGPRs = 0; 4564 else { 4565 unsigned MaxAddressableNumSGPRs = 4566 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4567 4568 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4569 NumSGPRs > MaxAddressableNumSGPRs) 4570 return OutOfRangeError(SGPRRange); 4571 4572 NumSGPRs += 4573 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4574 4575 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4576 NumSGPRs > MaxAddressableNumSGPRs) 4577 return OutOfRangeError(SGPRRange); 4578 4579 if (Features.test(FeatureSGPRInitBug)) 4580 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4581 } 4582 4583 VGPRBlocks = 4584 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4585 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4586 4587 return false; 4588 } 4589 4590 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4591 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4592 return TokError("directive only supported for amdgcn architecture"); 4593 4594 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4595 return TokError("directive only supported for amdhsa OS"); 4596 4597 StringRef KernelName; 4598 if (getParser().parseIdentifier(KernelName)) 4599 return true; 4600 4601 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4602 4603 StringSet<> Seen; 4604 4605 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4606 4607 SMRange VGPRRange; 4608 uint64_t NextFreeVGPR = 0; 4609 uint64_t AccumOffset = 0; 4610 SMRange SGPRRange; 4611 uint64_t NextFreeSGPR = 0; 4612 4613 // Count the number of user SGPRs implied from the enabled feature bits. 4614 unsigned ImpliedUserSGPRCount = 0; 4615 4616 // Track if the asm explicitly contains the directive for the user SGPR 4617 // count. 4618 Optional<unsigned> ExplicitUserSGPRCount; 4619 bool ReserveVCC = true; 4620 bool ReserveFlatScr = true; 4621 Optional<bool> EnableWavefrontSize32; 4622 4623 while (true) { 4624 while (trySkipToken(AsmToken::EndOfStatement)); 4625 4626 StringRef ID; 4627 SMRange IDRange = getTok().getLocRange(); 4628 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4629 return true; 4630 4631 if (ID == ".end_amdhsa_kernel") 4632 break; 4633 4634 if (Seen.find(ID) != Seen.end()) 4635 return TokError(".amdhsa_ directives cannot be repeated"); 4636 Seen.insert(ID); 4637 4638 SMLoc ValStart = getLoc(); 4639 int64_t IVal; 4640 if (getParser().parseAbsoluteExpression(IVal)) 4641 return true; 4642 SMLoc ValEnd = getLoc(); 4643 SMRange ValRange = SMRange(ValStart, ValEnd); 4644 4645 if (IVal < 0) 4646 return OutOfRangeError(ValRange); 4647 4648 uint64_t Val = IVal; 4649 4650 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4651 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4652 return OutOfRangeError(RANGE); \ 4653 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4654 4655 if (ID == ".amdhsa_group_segment_fixed_size") { 4656 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4657 return OutOfRangeError(ValRange); 4658 KD.group_segment_fixed_size = Val; 4659 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4660 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4661 return OutOfRangeError(ValRange); 4662 KD.private_segment_fixed_size = Val; 4663 } else if (ID == ".amdhsa_kernarg_size") { 4664 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4665 return OutOfRangeError(ValRange); 4666 KD.kernarg_size = Val; 4667 } else if (ID == ".amdhsa_user_sgpr_count") { 4668 ExplicitUserSGPRCount = Val; 4669 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4670 if (hasArchitectedFlatScratch()) 4671 return Error(IDRange.Start, 4672 "directive is not supported with architected flat scratch", 4673 IDRange); 4674 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4675 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4676 Val, ValRange); 4677 if (Val) 4678 ImpliedUserSGPRCount += 4; 4679 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4680 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4682 ValRange); 4683 if (Val) 4684 ImpliedUserSGPRCount += 2; 4685 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4686 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4687 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4688 ValRange); 4689 if (Val) 4690 ImpliedUserSGPRCount += 2; 4691 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4692 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4693 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4694 Val, ValRange); 4695 if (Val) 4696 ImpliedUserSGPRCount += 2; 4697 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4698 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4699 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4700 ValRange); 4701 if (Val) 4702 ImpliedUserSGPRCount += 2; 4703 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4704 if (hasArchitectedFlatScratch()) 4705 return Error(IDRange.Start, 4706 "directive is not supported with architected flat scratch", 4707 IDRange); 4708 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4709 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4710 ValRange); 4711 if (Val) 4712 ImpliedUserSGPRCount += 2; 4713 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4714 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4715 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4716 Val, ValRange); 4717 if (Val) 4718 ImpliedUserSGPRCount += 1; 4719 } else if (ID == ".amdhsa_wavefront_size32") { 4720 if (IVersion.Major < 10) 4721 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4722 EnableWavefrontSize32 = Val; 4723 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4724 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4725 Val, ValRange); 4726 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4727 if (hasArchitectedFlatScratch()) 4728 return Error(IDRange.Start, 4729 "directive is not supported with architected flat scratch", 4730 IDRange); 4731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4732 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4733 } else if (ID == ".amdhsa_enable_private_segment") { 4734 if (!hasArchitectedFlatScratch()) 4735 return Error( 4736 IDRange.Start, 4737 "directive is not supported without architected flat scratch", 4738 IDRange); 4739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4740 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4741 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4742 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4743 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4744 ValRange); 4745 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4746 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4747 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4748 ValRange); 4749 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4751 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4752 ValRange); 4753 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4755 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4756 ValRange); 4757 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4758 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4759 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4760 ValRange); 4761 } else if (ID == ".amdhsa_next_free_vgpr") { 4762 VGPRRange = ValRange; 4763 NextFreeVGPR = Val; 4764 } else if (ID == ".amdhsa_next_free_sgpr") { 4765 SGPRRange = ValRange; 4766 NextFreeSGPR = Val; 4767 } else if (ID == ".amdhsa_accum_offset") { 4768 if (!isGFX90A()) 4769 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4770 AccumOffset = Val; 4771 } else if (ID == ".amdhsa_reserve_vcc") { 4772 if (!isUInt<1>(Val)) 4773 return OutOfRangeError(ValRange); 4774 ReserveVCC = Val; 4775 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4776 if (IVersion.Major < 7) 4777 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4778 if (hasArchitectedFlatScratch()) 4779 return Error(IDRange.Start, 4780 "directive is not supported with architected flat scratch", 4781 IDRange); 4782 if (!isUInt<1>(Val)) 4783 return OutOfRangeError(ValRange); 4784 ReserveFlatScr = Val; 4785 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4786 if (IVersion.Major < 8) 4787 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4788 if (!isUInt<1>(Val)) 4789 return OutOfRangeError(ValRange); 4790 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4791 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4792 IDRange); 4793 } else if (ID == ".amdhsa_float_round_mode_32") { 4794 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4795 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4796 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4797 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4798 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4799 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4800 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4801 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4802 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4804 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4805 ValRange); 4806 } else if (ID == ".amdhsa_dx10_clamp") { 4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4808 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4809 } else if (ID == ".amdhsa_ieee_mode") { 4810 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4811 Val, ValRange); 4812 } else if (ID == ".amdhsa_fp16_overflow") { 4813 if (IVersion.Major < 9) 4814 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4815 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4816 ValRange); 4817 } else if (ID == ".amdhsa_tg_split") { 4818 if (!isGFX90A()) 4819 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4821 ValRange); 4822 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4823 if (IVersion.Major < 10) 4824 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4825 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4826 ValRange); 4827 } else if (ID == ".amdhsa_memory_ordered") { 4828 if (IVersion.Major < 10) 4829 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4830 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4831 ValRange); 4832 } else if (ID == ".amdhsa_forward_progress") { 4833 if (IVersion.Major < 10) 4834 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4835 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4836 ValRange); 4837 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4838 PARSE_BITS_ENTRY( 4839 KD.compute_pgm_rsrc2, 4840 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4841 ValRange); 4842 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4843 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4844 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4845 Val, ValRange); 4846 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4847 PARSE_BITS_ENTRY( 4848 KD.compute_pgm_rsrc2, 4849 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4850 ValRange); 4851 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4852 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4853 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4854 Val, ValRange); 4855 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4856 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4857 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4858 Val, ValRange); 4859 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4860 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4861 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4862 Val, ValRange); 4863 } else if (ID == ".amdhsa_exception_int_div_zero") { 4864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4865 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4866 Val, ValRange); 4867 } else { 4868 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4869 } 4870 4871 #undef PARSE_BITS_ENTRY 4872 } 4873 4874 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4875 return TokError(".amdhsa_next_free_vgpr directive is required"); 4876 4877 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4878 return TokError(".amdhsa_next_free_sgpr directive is required"); 4879 4880 unsigned VGPRBlocks; 4881 unsigned SGPRBlocks; 4882 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4883 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4884 EnableWavefrontSize32, NextFreeVGPR, 4885 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4886 SGPRBlocks)) 4887 return true; 4888 4889 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4890 VGPRBlocks)) 4891 return OutOfRangeError(VGPRRange); 4892 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4893 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4894 4895 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4896 SGPRBlocks)) 4897 return OutOfRangeError(SGPRRange); 4898 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4899 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4900 SGPRBlocks); 4901 4902 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4903 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4904 "enabled user SGPRs"); 4905 4906 unsigned UserSGPRCount = 4907 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4908 4909 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4910 return TokError("too many user SGPRs enabled"); 4911 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4912 UserSGPRCount); 4913 4914 if (isGFX90A()) { 4915 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4916 return TokError(".amdhsa_accum_offset directive is required"); 4917 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4918 return TokError("accum_offset should be in range [4..256] in " 4919 "increments of 4"); 4920 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4921 return TokError("accum_offset exceeds total VGPR allocation"); 4922 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4923 (AccumOffset / 4 - 1)); 4924 } 4925 4926 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4927 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4928 ReserveFlatScr); 4929 return false; 4930 } 4931 4932 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4933 uint32_t Major; 4934 uint32_t Minor; 4935 4936 if (ParseDirectiveMajorMinor(Major, Minor)) 4937 return true; 4938 4939 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4940 return false; 4941 } 4942 4943 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4944 uint32_t Major; 4945 uint32_t Minor; 4946 uint32_t Stepping; 4947 StringRef VendorName; 4948 StringRef ArchName; 4949 4950 // If this directive has no arguments, then use the ISA version for the 4951 // targeted GPU. 4952 if (isToken(AsmToken::EndOfStatement)) { 4953 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4954 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4955 ISA.Stepping, 4956 "AMD", "AMDGPU"); 4957 return false; 4958 } 4959 4960 if (ParseDirectiveMajorMinor(Major, Minor)) 4961 return true; 4962 4963 if (!trySkipToken(AsmToken::Comma)) 4964 return TokError("stepping version number required, comma expected"); 4965 4966 if (ParseAsAbsoluteExpression(Stepping)) 4967 return TokError("invalid stepping version"); 4968 4969 if (!trySkipToken(AsmToken::Comma)) 4970 return TokError("vendor name required, comma expected"); 4971 4972 if (!parseString(VendorName, "invalid vendor name")) 4973 return true; 4974 4975 if (!trySkipToken(AsmToken::Comma)) 4976 return TokError("arch name required, comma expected"); 4977 4978 if (!parseString(ArchName, "invalid arch name")) 4979 return true; 4980 4981 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4982 VendorName, ArchName); 4983 return false; 4984 } 4985 4986 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4987 amd_kernel_code_t &Header) { 4988 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4989 // assembly for backwards compatibility. 4990 if (ID == "max_scratch_backing_memory_byte_size") { 4991 Parser.eatToEndOfStatement(); 4992 return false; 4993 } 4994 4995 SmallString<40> ErrStr; 4996 raw_svector_ostream Err(ErrStr); 4997 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4998 return TokError(Err.str()); 4999 } 5000 Lex(); 5001 5002 if (ID == "enable_wavefront_size32") { 5003 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5004 if (!isGFX10Plus()) 5005 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5006 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5007 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5008 } else { 5009 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5010 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5011 } 5012 } 5013 5014 if (ID == "wavefront_size") { 5015 if (Header.wavefront_size == 5) { 5016 if (!isGFX10Plus()) 5017 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5018 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5019 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5020 } else if (Header.wavefront_size == 6) { 5021 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5022 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5023 } 5024 } 5025 5026 if (ID == "enable_wgp_mode") { 5027 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5028 !isGFX10Plus()) 5029 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5030 } 5031 5032 if (ID == "enable_mem_ordered") { 5033 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5034 !isGFX10Plus()) 5035 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5036 } 5037 5038 if (ID == "enable_fwd_progress") { 5039 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5040 !isGFX10Plus()) 5041 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5042 } 5043 5044 return false; 5045 } 5046 5047 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5048 amd_kernel_code_t Header; 5049 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5050 5051 while (true) { 5052 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5053 // will set the current token to EndOfStatement. 5054 while(trySkipToken(AsmToken::EndOfStatement)); 5055 5056 StringRef ID; 5057 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5058 return true; 5059 5060 if (ID == ".end_amd_kernel_code_t") 5061 break; 5062 5063 if (ParseAMDKernelCodeTValue(ID, Header)) 5064 return true; 5065 } 5066 5067 getTargetStreamer().EmitAMDKernelCodeT(Header); 5068 5069 return false; 5070 } 5071 5072 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5073 StringRef KernelName; 5074 if (!parseId(KernelName, "expected symbol name")) 5075 return true; 5076 5077 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5078 ELF::STT_AMDGPU_HSA_KERNEL); 5079 5080 KernelScope.initialize(getContext()); 5081 return false; 5082 } 5083 5084 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5085 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5086 return Error(getLoc(), 5087 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5088 "architectures"); 5089 } 5090 5091 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5092 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5093 return Error(getParser().getTok().getLoc(), "target id must match options"); 5094 5095 getTargetStreamer().EmitISAVersion(); 5096 Lex(); 5097 5098 return false; 5099 } 5100 5101 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5102 const char *AssemblerDirectiveBegin; 5103 const char *AssemblerDirectiveEnd; 5104 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5105 isHsaAbiVersion3AndAbove(&getSTI()) 5106 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5107 HSAMD::V3::AssemblerDirectiveEnd) 5108 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5109 HSAMD::AssemblerDirectiveEnd); 5110 5111 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5112 return Error(getLoc(), 5113 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5114 "not available on non-amdhsa OSes")).str()); 5115 } 5116 5117 std::string HSAMetadataString; 5118 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5119 HSAMetadataString)) 5120 return true; 5121 5122 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5123 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5124 return Error(getLoc(), "invalid HSA metadata"); 5125 } else { 5126 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5127 return Error(getLoc(), "invalid HSA metadata"); 5128 } 5129 5130 return false; 5131 } 5132 5133 /// Common code to parse out a block of text (typically YAML) between start and 5134 /// end directives. 5135 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5136 const char *AssemblerDirectiveEnd, 5137 std::string &CollectString) { 5138 5139 raw_string_ostream CollectStream(CollectString); 5140 5141 getLexer().setSkipSpace(false); 5142 5143 bool FoundEnd = false; 5144 while (!isToken(AsmToken::Eof)) { 5145 while (isToken(AsmToken::Space)) { 5146 CollectStream << getTokenStr(); 5147 Lex(); 5148 } 5149 5150 if (trySkipId(AssemblerDirectiveEnd)) { 5151 FoundEnd = true; 5152 break; 5153 } 5154 5155 CollectStream << Parser.parseStringToEndOfStatement() 5156 << getContext().getAsmInfo()->getSeparatorString(); 5157 5158 Parser.eatToEndOfStatement(); 5159 } 5160 5161 getLexer().setSkipSpace(true); 5162 5163 if (isToken(AsmToken::Eof) && !FoundEnd) { 5164 return TokError(Twine("expected directive ") + 5165 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5166 } 5167 5168 CollectStream.flush(); 5169 return false; 5170 } 5171 5172 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5173 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5174 std::string String; 5175 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5176 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5177 return true; 5178 5179 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5180 if (!PALMetadata->setFromString(String)) 5181 return Error(getLoc(), "invalid PAL metadata"); 5182 return false; 5183 } 5184 5185 /// Parse the assembler directive for old linear-format PAL metadata. 5186 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5187 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5188 return Error(getLoc(), 5189 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5190 "not available on non-amdpal OSes")).str()); 5191 } 5192 5193 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5194 PALMetadata->setLegacy(); 5195 for (;;) { 5196 uint32_t Key, Value; 5197 if (ParseAsAbsoluteExpression(Key)) { 5198 return TokError(Twine("invalid value in ") + 5199 Twine(PALMD::AssemblerDirective)); 5200 } 5201 if (!trySkipToken(AsmToken::Comma)) { 5202 return TokError(Twine("expected an even number of values in ") + 5203 Twine(PALMD::AssemblerDirective)); 5204 } 5205 if (ParseAsAbsoluteExpression(Value)) { 5206 return TokError(Twine("invalid value in ") + 5207 Twine(PALMD::AssemblerDirective)); 5208 } 5209 PALMetadata->setRegister(Key, Value); 5210 if (!trySkipToken(AsmToken::Comma)) 5211 break; 5212 } 5213 return false; 5214 } 5215 5216 /// ParseDirectiveAMDGPULDS 5217 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5218 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5219 if (getParser().checkForValidSection()) 5220 return true; 5221 5222 StringRef Name; 5223 SMLoc NameLoc = getLoc(); 5224 if (getParser().parseIdentifier(Name)) 5225 return TokError("expected identifier in directive"); 5226 5227 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5228 if (parseToken(AsmToken::Comma, "expected ','")) 5229 return true; 5230 5231 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5232 5233 int64_t Size; 5234 SMLoc SizeLoc = getLoc(); 5235 if (getParser().parseAbsoluteExpression(Size)) 5236 return true; 5237 if (Size < 0) 5238 return Error(SizeLoc, "size must be non-negative"); 5239 if (Size > LocalMemorySize) 5240 return Error(SizeLoc, "size is too large"); 5241 5242 int64_t Alignment = 4; 5243 if (trySkipToken(AsmToken::Comma)) { 5244 SMLoc AlignLoc = getLoc(); 5245 if (getParser().parseAbsoluteExpression(Alignment)) 5246 return true; 5247 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5248 return Error(AlignLoc, "alignment must be a power of two"); 5249 5250 // Alignment larger than the size of LDS is possible in theory, as long 5251 // as the linker manages to place to symbol at address 0, but we do want 5252 // to make sure the alignment fits nicely into a 32-bit integer. 5253 if (Alignment >= 1u << 31) 5254 return Error(AlignLoc, "alignment is too large"); 5255 } 5256 5257 if (parseToken(AsmToken::EndOfStatement, 5258 "unexpected token in '.amdgpu_lds' directive")) 5259 return true; 5260 5261 Symbol->redefineIfPossible(); 5262 if (!Symbol->isUndefined()) 5263 return Error(NameLoc, "invalid symbol redefinition"); 5264 5265 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5266 return false; 5267 } 5268 5269 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5270 StringRef IDVal = DirectiveID.getString(); 5271 5272 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5273 if (IDVal == ".amdhsa_kernel") 5274 return ParseDirectiveAMDHSAKernel(); 5275 5276 // TODO: Restructure/combine with PAL metadata directive. 5277 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5278 return ParseDirectiveHSAMetadata(); 5279 } else { 5280 if (IDVal == ".hsa_code_object_version") 5281 return ParseDirectiveHSACodeObjectVersion(); 5282 5283 if (IDVal == ".hsa_code_object_isa") 5284 return ParseDirectiveHSACodeObjectISA(); 5285 5286 if (IDVal == ".amd_kernel_code_t") 5287 return ParseDirectiveAMDKernelCodeT(); 5288 5289 if (IDVal == ".amdgpu_hsa_kernel") 5290 return ParseDirectiveAMDGPUHsaKernel(); 5291 5292 if (IDVal == ".amd_amdgpu_isa") 5293 return ParseDirectiveISAVersion(); 5294 5295 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5296 return ParseDirectiveHSAMetadata(); 5297 } 5298 5299 if (IDVal == ".amdgcn_target") 5300 return ParseDirectiveAMDGCNTarget(); 5301 5302 if (IDVal == ".amdgpu_lds") 5303 return ParseDirectiveAMDGPULDS(); 5304 5305 if (IDVal == PALMD::AssemblerDirectiveBegin) 5306 return ParseDirectivePALMetadataBegin(); 5307 5308 if (IDVal == PALMD::AssemblerDirective) 5309 return ParseDirectivePALMetadata(); 5310 5311 return true; 5312 } 5313 5314 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5315 unsigned RegNo) { 5316 5317 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5318 R.isValid(); ++R) { 5319 if (*R == RegNo) 5320 return isGFX9Plus(); 5321 } 5322 5323 // GFX10 has 2 more SGPRs 104 and 105. 5324 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5325 R.isValid(); ++R) { 5326 if (*R == RegNo) 5327 return hasSGPR104_SGPR105(); 5328 } 5329 5330 switch (RegNo) { 5331 case AMDGPU::SRC_SHARED_BASE: 5332 case AMDGPU::SRC_SHARED_LIMIT: 5333 case AMDGPU::SRC_PRIVATE_BASE: 5334 case AMDGPU::SRC_PRIVATE_LIMIT: 5335 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5336 return isGFX9Plus(); 5337 case AMDGPU::TBA: 5338 case AMDGPU::TBA_LO: 5339 case AMDGPU::TBA_HI: 5340 case AMDGPU::TMA: 5341 case AMDGPU::TMA_LO: 5342 case AMDGPU::TMA_HI: 5343 return !isGFX9Plus(); 5344 case AMDGPU::XNACK_MASK: 5345 case AMDGPU::XNACK_MASK_LO: 5346 case AMDGPU::XNACK_MASK_HI: 5347 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5348 case AMDGPU::SGPR_NULL: 5349 return isGFX10Plus(); 5350 default: 5351 break; 5352 } 5353 5354 if (isCI()) 5355 return true; 5356 5357 if (isSI() || isGFX10Plus()) { 5358 // No flat_scr on SI. 5359 // On GFX10 flat scratch is not a valid register operand and can only be 5360 // accessed with s_setreg/s_getreg. 5361 switch (RegNo) { 5362 case AMDGPU::FLAT_SCR: 5363 case AMDGPU::FLAT_SCR_LO: 5364 case AMDGPU::FLAT_SCR_HI: 5365 return false; 5366 default: 5367 return true; 5368 } 5369 } 5370 5371 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5372 // SI/CI have. 5373 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5374 R.isValid(); ++R) { 5375 if (*R == RegNo) 5376 return hasSGPR102_SGPR103(); 5377 } 5378 5379 return true; 5380 } 5381 5382 OperandMatchResultTy 5383 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5384 OperandMode Mode) { 5385 // Try to parse with a custom parser 5386 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5387 5388 // If we successfully parsed the operand or if there as an error parsing, 5389 // we are done. 5390 // 5391 // If we are parsing after we reach EndOfStatement then this means we 5392 // are appending default values to the Operands list. This is only done 5393 // by custom parser, so we shouldn't continue on to the generic parsing. 5394 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5395 isToken(AsmToken::EndOfStatement)) 5396 return ResTy; 5397 5398 SMLoc RBraceLoc; 5399 SMLoc LBraceLoc = getLoc(); 5400 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5401 unsigned Prefix = Operands.size(); 5402 5403 for (;;) { 5404 auto Loc = getLoc(); 5405 ResTy = parseReg(Operands); 5406 if (ResTy == MatchOperand_NoMatch) 5407 Error(Loc, "expected a register"); 5408 if (ResTy != MatchOperand_Success) 5409 return MatchOperand_ParseFail; 5410 5411 RBraceLoc = getLoc(); 5412 if (trySkipToken(AsmToken::RBrac)) 5413 break; 5414 5415 if (!skipToken(AsmToken::Comma, 5416 "expected a comma or a closing square bracket")) { 5417 return MatchOperand_ParseFail; 5418 } 5419 } 5420 5421 if (Operands.size() - Prefix > 1) { 5422 Operands.insert(Operands.begin() + Prefix, 5423 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5424 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5425 } 5426 5427 return MatchOperand_Success; 5428 } 5429 5430 return parseRegOrImm(Operands); 5431 } 5432 5433 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5434 // Clear any forced encodings from the previous instruction. 5435 setForcedEncodingSize(0); 5436 setForcedDPP(false); 5437 setForcedSDWA(false); 5438 5439 if (Name.endswith("_e64")) { 5440 setForcedEncodingSize(64); 5441 return Name.substr(0, Name.size() - 4); 5442 } else if (Name.endswith("_e32")) { 5443 setForcedEncodingSize(32); 5444 return Name.substr(0, Name.size() - 4); 5445 } else if (Name.endswith("_dpp")) { 5446 setForcedDPP(true); 5447 return Name.substr(0, Name.size() - 4); 5448 } else if (Name.endswith("_sdwa")) { 5449 setForcedSDWA(true); 5450 return Name.substr(0, Name.size() - 5); 5451 } 5452 return Name; 5453 } 5454 5455 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5456 StringRef Name, 5457 SMLoc NameLoc, OperandVector &Operands) { 5458 // Add the instruction mnemonic 5459 Name = parseMnemonicSuffix(Name); 5460 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5461 5462 bool IsMIMG = Name.startswith("image_"); 5463 5464 while (!trySkipToken(AsmToken::EndOfStatement)) { 5465 OperandMode Mode = OperandMode_Default; 5466 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5467 Mode = OperandMode_NSA; 5468 CPolSeen = 0; 5469 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5470 5471 if (Res != MatchOperand_Success) { 5472 checkUnsupportedInstruction(Name, NameLoc); 5473 if (!Parser.hasPendingError()) { 5474 // FIXME: use real operand location rather than the current location. 5475 StringRef Msg = 5476 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5477 "not a valid operand."; 5478 Error(getLoc(), Msg); 5479 } 5480 while (!trySkipToken(AsmToken::EndOfStatement)) { 5481 lex(); 5482 } 5483 return true; 5484 } 5485 5486 // Eat the comma or space if there is one. 5487 trySkipToken(AsmToken::Comma); 5488 } 5489 5490 return false; 5491 } 5492 5493 //===----------------------------------------------------------------------===// 5494 // Utility functions 5495 //===----------------------------------------------------------------------===// 5496 5497 OperandMatchResultTy 5498 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5499 5500 if (!trySkipId(Prefix, AsmToken::Colon)) 5501 return MatchOperand_NoMatch; 5502 5503 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5504 } 5505 5506 OperandMatchResultTy 5507 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5508 AMDGPUOperand::ImmTy ImmTy, 5509 bool (*ConvertResult)(int64_t&)) { 5510 SMLoc S = getLoc(); 5511 int64_t Value = 0; 5512 5513 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5514 if (Res != MatchOperand_Success) 5515 return Res; 5516 5517 if (ConvertResult && !ConvertResult(Value)) { 5518 Error(S, "invalid " + StringRef(Prefix) + " value."); 5519 } 5520 5521 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5522 return MatchOperand_Success; 5523 } 5524 5525 OperandMatchResultTy 5526 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5527 OperandVector &Operands, 5528 AMDGPUOperand::ImmTy ImmTy, 5529 bool (*ConvertResult)(int64_t&)) { 5530 SMLoc S = getLoc(); 5531 if (!trySkipId(Prefix, AsmToken::Colon)) 5532 return MatchOperand_NoMatch; 5533 5534 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5535 return MatchOperand_ParseFail; 5536 5537 unsigned Val = 0; 5538 const unsigned MaxSize = 4; 5539 5540 // FIXME: How to verify the number of elements matches the number of src 5541 // operands? 5542 for (int I = 0; ; ++I) { 5543 int64_t Op; 5544 SMLoc Loc = getLoc(); 5545 if (!parseExpr(Op)) 5546 return MatchOperand_ParseFail; 5547 5548 if (Op != 0 && Op != 1) { 5549 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5550 return MatchOperand_ParseFail; 5551 } 5552 5553 Val |= (Op << I); 5554 5555 if (trySkipToken(AsmToken::RBrac)) 5556 break; 5557 5558 if (I + 1 == MaxSize) { 5559 Error(getLoc(), "expected a closing square bracket"); 5560 return MatchOperand_ParseFail; 5561 } 5562 5563 if (!skipToken(AsmToken::Comma, "expected a comma")) 5564 return MatchOperand_ParseFail; 5565 } 5566 5567 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5568 return MatchOperand_Success; 5569 } 5570 5571 OperandMatchResultTy 5572 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5573 AMDGPUOperand::ImmTy ImmTy) { 5574 int64_t Bit; 5575 SMLoc S = getLoc(); 5576 5577 if (trySkipId(Name)) { 5578 Bit = 1; 5579 } else if (trySkipId("no", Name)) { 5580 Bit = 0; 5581 } else { 5582 return MatchOperand_NoMatch; 5583 } 5584 5585 if (Name == "r128" && !hasMIMG_R128()) { 5586 Error(S, "r128 modifier is not supported on this GPU"); 5587 return MatchOperand_ParseFail; 5588 } 5589 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5590 Error(S, "a16 modifier is not supported on this GPU"); 5591 return MatchOperand_ParseFail; 5592 } 5593 5594 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5595 ImmTy = AMDGPUOperand::ImmTyR128A16; 5596 5597 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5598 return MatchOperand_Success; 5599 } 5600 5601 OperandMatchResultTy 5602 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5603 unsigned CPolOn = 0; 5604 unsigned CPolOff = 0; 5605 SMLoc S = getLoc(); 5606 5607 if (trySkipId("glc")) 5608 CPolOn = AMDGPU::CPol::GLC; 5609 else if (trySkipId("noglc")) 5610 CPolOff = AMDGPU::CPol::GLC; 5611 else if (trySkipId("slc")) 5612 CPolOn = AMDGPU::CPol::SLC; 5613 else if (trySkipId("noslc")) 5614 CPolOff = AMDGPU::CPol::SLC; 5615 else if (trySkipId("dlc")) 5616 CPolOn = AMDGPU::CPol::DLC; 5617 else if (trySkipId("nodlc")) 5618 CPolOff = AMDGPU::CPol::DLC; 5619 else if (trySkipId("scc")) 5620 CPolOn = AMDGPU::CPol::SCC; 5621 else if (trySkipId("noscc")) 5622 CPolOff = AMDGPU::CPol::SCC; 5623 else 5624 return MatchOperand_NoMatch; 5625 5626 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5627 Error(S, "dlc modifier is not supported on this GPU"); 5628 return MatchOperand_ParseFail; 5629 } 5630 5631 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5632 Error(S, "scc modifier is not supported on this GPU"); 5633 return MatchOperand_ParseFail; 5634 } 5635 5636 if (CPolSeen & (CPolOn | CPolOff)) { 5637 Error(S, "duplicate cache policy modifier"); 5638 return MatchOperand_ParseFail; 5639 } 5640 5641 CPolSeen |= (CPolOn | CPolOff); 5642 5643 for (unsigned I = 1; I != Operands.size(); ++I) { 5644 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5645 if (Op.isCPol()) { 5646 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5647 return MatchOperand_Success; 5648 } 5649 } 5650 5651 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5652 AMDGPUOperand::ImmTyCPol)); 5653 5654 return MatchOperand_Success; 5655 } 5656 5657 static void addOptionalImmOperand( 5658 MCInst& Inst, const OperandVector& Operands, 5659 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5660 AMDGPUOperand::ImmTy ImmT, 5661 int64_t Default = 0) { 5662 auto i = OptionalIdx.find(ImmT); 5663 if (i != OptionalIdx.end()) { 5664 unsigned Idx = i->second; 5665 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5666 } else { 5667 Inst.addOperand(MCOperand::createImm(Default)); 5668 } 5669 } 5670 5671 OperandMatchResultTy 5672 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5673 StringRef &Value, 5674 SMLoc &StringLoc) { 5675 if (!trySkipId(Prefix, AsmToken::Colon)) 5676 return MatchOperand_NoMatch; 5677 5678 StringLoc = getLoc(); 5679 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5680 : MatchOperand_ParseFail; 5681 } 5682 5683 //===----------------------------------------------------------------------===// 5684 // MTBUF format 5685 //===----------------------------------------------------------------------===// 5686 5687 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5688 int64_t MaxVal, 5689 int64_t &Fmt) { 5690 int64_t Val; 5691 SMLoc Loc = getLoc(); 5692 5693 auto Res = parseIntWithPrefix(Pref, Val); 5694 if (Res == MatchOperand_ParseFail) 5695 return false; 5696 if (Res == MatchOperand_NoMatch) 5697 return true; 5698 5699 if (Val < 0 || Val > MaxVal) { 5700 Error(Loc, Twine("out of range ", StringRef(Pref))); 5701 return false; 5702 } 5703 5704 Fmt = Val; 5705 return true; 5706 } 5707 5708 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5709 // values to live in a joint format operand in the MCInst encoding. 5710 OperandMatchResultTy 5711 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5712 using namespace llvm::AMDGPU::MTBUFFormat; 5713 5714 int64_t Dfmt = DFMT_UNDEF; 5715 int64_t Nfmt = NFMT_UNDEF; 5716 5717 // dfmt and nfmt can appear in either order, and each is optional. 5718 for (int I = 0; I < 2; ++I) { 5719 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5720 return MatchOperand_ParseFail; 5721 5722 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5723 return MatchOperand_ParseFail; 5724 } 5725 // Skip optional comma between dfmt/nfmt 5726 // but guard against 2 commas following each other. 5727 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5728 !peekToken().is(AsmToken::Comma)) { 5729 trySkipToken(AsmToken::Comma); 5730 } 5731 } 5732 5733 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5734 return MatchOperand_NoMatch; 5735 5736 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5737 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5738 5739 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5740 return MatchOperand_Success; 5741 } 5742 5743 OperandMatchResultTy 5744 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5745 using namespace llvm::AMDGPU::MTBUFFormat; 5746 5747 int64_t Fmt = UFMT_UNDEF; 5748 5749 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5750 return MatchOperand_ParseFail; 5751 5752 if (Fmt == UFMT_UNDEF) 5753 return MatchOperand_NoMatch; 5754 5755 Format = Fmt; 5756 return MatchOperand_Success; 5757 } 5758 5759 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5760 int64_t &Nfmt, 5761 StringRef FormatStr, 5762 SMLoc Loc) { 5763 using namespace llvm::AMDGPU::MTBUFFormat; 5764 int64_t Format; 5765 5766 Format = getDfmt(FormatStr); 5767 if (Format != DFMT_UNDEF) { 5768 Dfmt = Format; 5769 return true; 5770 } 5771 5772 Format = getNfmt(FormatStr, getSTI()); 5773 if (Format != NFMT_UNDEF) { 5774 Nfmt = Format; 5775 return true; 5776 } 5777 5778 Error(Loc, "unsupported format"); 5779 return false; 5780 } 5781 5782 OperandMatchResultTy 5783 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5784 SMLoc FormatLoc, 5785 int64_t &Format) { 5786 using namespace llvm::AMDGPU::MTBUFFormat; 5787 5788 int64_t Dfmt = DFMT_UNDEF; 5789 int64_t Nfmt = NFMT_UNDEF; 5790 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5791 return MatchOperand_ParseFail; 5792 5793 if (trySkipToken(AsmToken::Comma)) { 5794 StringRef Str; 5795 SMLoc Loc = getLoc(); 5796 if (!parseId(Str, "expected a format string") || 5797 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5798 return MatchOperand_ParseFail; 5799 } 5800 if (Dfmt == DFMT_UNDEF) { 5801 Error(Loc, "duplicate numeric format"); 5802 return MatchOperand_ParseFail; 5803 } else if (Nfmt == NFMT_UNDEF) { 5804 Error(Loc, "duplicate data format"); 5805 return MatchOperand_ParseFail; 5806 } 5807 } 5808 5809 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5810 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5811 5812 if (isGFX10Plus()) { 5813 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5814 if (Ufmt == UFMT_UNDEF) { 5815 Error(FormatLoc, "unsupported format"); 5816 return MatchOperand_ParseFail; 5817 } 5818 Format = Ufmt; 5819 } else { 5820 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5821 } 5822 5823 return MatchOperand_Success; 5824 } 5825 5826 OperandMatchResultTy 5827 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5828 SMLoc Loc, 5829 int64_t &Format) { 5830 using namespace llvm::AMDGPU::MTBUFFormat; 5831 5832 auto Id = getUnifiedFormat(FormatStr); 5833 if (Id == UFMT_UNDEF) 5834 return MatchOperand_NoMatch; 5835 5836 if (!isGFX10Plus()) { 5837 Error(Loc, "unified format is not supported on this GPU"); 5838 return MatchOperand_ParseFail; 5839 } 5840 5841 Format = Id; 5842 return MatchOperand_Success; 5843 } 5844 5845 OperandMatchResultTy 5846 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5847 using namespace llvm::AMDGPU::MTBUFFormat; 5848 SMLoc Loc = getLoc(); 5849 5850 if (!parseExpr(Format)) 5851 return MatchOperand_ParseFail; 5852 if (!isValidFormatEncoding(Format, getSTI())) { 5853 Error(Loc, "out of range format"); 5854 return MatchOperand_ParseFail; 5855 } 5856 5857 return MatchOperand_Success; 5858 } 5859 5860 OperandMatchResultTy 5861 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5862 using namespace llvm::AMDGPU::MTBUFFormat; 5863 5864 if (!trySkipId("format", AsmToken::Colon)) 5865 return MatchOperand_NoMatch; 5866 5867 if (trySkipToken(AsmToken::LBrac)) { 5868 StringRef FormatStr; 5869 SMLoc Loc = getLoc(); 5870 if (!parseId(FormatStr, "expected a format string")) 5871 return MatchOperand_ParseFail; 5872 5873 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5874 if (Res == MatchOperand_NoMatch) 5875 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5876 if (Res != MatchOperand_Success) 5877 return Res; 5878 5879 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5880 return MatchOperand_ParseFail; 5881 5882 return MatchOperand_Success; 5883 } 5884 5885 return parseNumericFormat(Format); 5886 } 5887 5888 OperandMatchResultTy 5889 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5890 using namespace llvm::AMDGPU::MTBUFFormat; 5891 5892 int64_t Format = getDefaultFormatEncoding(getSTI()); 5893 OperandMatchResultTy Res; 5894 SMLoc Loc = getLoc(); 5895 5896 // Parse legacy format syntax. 5897 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5898 if (Res == MatchOperand_ParseFail) 5899 return Res; 5900 5901 bool FormatFound = (Res == MatchOperand_Success); 5902 5903 Operands.push_back( 5904 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5905 5906 if (FormatFound) 5907 trySkipToken(AsmToken::Comma); 5908 5909 if (isToken(AsmToken::EndOfStatement)) { 5910 // We are expecting an soffset operand, 5911 // but let matcher handle the error. 5912 return MatchOperand_Success; 5913 } 5914 5915 // Parse soffset. 5916 Res = parseRegOrImm(Operands); 5917 if (Res != MatchOperand_Success) 5918 return Res; 5919 5920 trySkipToken(AsmToken::Comma); 5921 5922 if (!FormatFound) { 5923 Res = parseSymbolicOrNumericFormat(Format); 5924 if (Res == MatchOperand_ParseFail) 5925 return Res; 5926 if (Res == MatchOperand_Success) { 5927 auto Size = Operands.size(); 5928 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5929 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5930 Op.setImm(Format); 5931 } 5932 return MatchOperand_Success; 5933 } 5934 5935 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5936 Error(getLoc(), "duplicate format"); 5937 return MatchOperand_ParseFail; 5938 } 5939 return MatchOperand_Success; 5940 } 5941 5942 //===----------------------------------------------------------------------===// 5943 // ds 5944 //===----------------------------------------------------------------------===// 5945 5946 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5947 const OperandVector &Operands) { 5948 OptionalImmIndexMap OptionalIdx; 5949 5950 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5951 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5952 5953 // Add the register arguments 5954 if (Op.isReg()) { 5955 Op.addRegOperands(Inst, 1); 5956 continue; 5957 } 5958 5959 // Handle optional arguments 5960 OptionalIdx[Op.getImmTy()] = i; 5961 } 5962 5963 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5966 5967 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5968 } 5969 5970 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5971 bool IsGdsHardcoded) { 5972 OptionalImmIndexMap OptionalIdx; 5973 5974 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5975 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5976 5977 // Add the register arguments 5978 if (Op.isReg()) { 5979 Op.addRegOperands(Inst, 1); 5980 continue; 5981 } 5982 5983 if (Op.isToken() && Op.getToken() == "gds") { 5984 IsGdsHardcoded = true; 5985 continue; 5986 } 5987 5988 // Handle optional arguments 5989 OptionalIdx[Op.getImmTy()] = i; 5990 } 5991 5992 AMDGPUOperand::ImmTy OffsetType = 5993 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5994 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5995 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5996 AMDGPUOperand::ImmTyOffset; 5997 5998 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5999 6000 if (!IsGdsHardcoded) { 6001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6002 } 6003 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6004 } 6005 6006 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6007 OptionalImmIndexMap OptionalIdx; 6008 6009 unsigned OperandIdx[4]; 6010 unsigned EnMask = 0; 6011 int SrcIdx = 0; 6012 6013 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6014 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6015 6016 // Add the register arguments 6017 if (Op.isReg()) { 6018 assert(SrcIdx < 4); 6019 OperandIdx[SrcIdx] = Inst.size(); 6020 Op.addRegOperands(Inst, 1); 6021 ++SrcIdx; 6022 continue; 6023 } 6024 6025 if (Op.isOff()) { 6026 assert(SrcIdx < 4); 6027 OperandIdx[SrcIdx] = Inst.size(); 6028 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6029 ++SrcIdx; 6030 continue; 6031 } 6032 6033 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6034 Op.addImmOperands(Inst, 1); 6035 continue; 6036 } 6037 6038 if (Op.isToken() && Op.getToken() == "done") 6039 continue; 6040 6041 // Handle optional arguments 6042 OptionalIdx[Op.getImmTy()] = i; 6043 } 6044 6045 assert(SrcIdx == 4); 6046 6047 bool Compr = false; 6048 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6049 Compr = true; 6050 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6051 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6052 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6053 } 6054 6055 for (auto i = 0; i < SrcIdx; ++i) { 6056 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6057 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6058 } 6059 } 6060 6061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6063 6064 Inst.addOperand(MCOperand::createImm(EnMask)); 6065 } 6066 6067 //===----------------------------------------------------------------------===// 6068 // s_waitcnt 6069 //===----------------------------------------------------------------------===// 6070 6071 static bool 6072 encodeCnt( 6073 const AMDGPU::IsaVersion ISA, 6074 int64_t &IntVal, 6075 int64_t CntVal, 6076 bool Saturate, 6077 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6078 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6079 { 6080 bool Failed = false; 6081 6082 IntVal = encode(ISA, IntVal, CntVal); 6083 if (CntVal != decode(ISA, IntVal)) { 6084 if (Saturate) { 6085 IntVal = encode(ISA, IntVal, -1); 6086 } else { 6087 Failed = true; 6088 } 6089 } 6090 return Failed; 6091 } 6092 6093 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6094 6095 SMLoc CntLoc = getLoc(); 6096 StringRef CntName = getTokenStr(); 6097 6098 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6099 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6100 return false; 6101 6102 int64_t CntVal; 6103 SMLoc ValLoc = getLoc(); 6104 if (!parseExpr(CntVal)) 6105 return false; 6106 6107 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6108 6109 bool Failed = true; 6110 bool Sat = CntName.endswith("_sat"); 6111 6112 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6113 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6114 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6115 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6116 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6117 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6118 } else { 6119 Error(CntLoc, "invalid counter name " + CntName); 6120 return false; 6121 } 6122 6123 if (Failed) { 6124 Error(ValLoc, "too large value for " + CntName); 6125 return false; 6126 } 6127 6128 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6129 return false; 6130 6131 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6132 if (isToken(AsmToken::EndOfStatement)) { 6133 Error(getLoc(), "expected a counter name"); 6134 return false; 6135 } 6136 } 6137 6138 return true; 6139 } 6140 6141 OperandMatchResultTy 6142 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6143 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6144 int64_t Waitcnt = getWaitcntBitMask(ISA); 6145 SMLoc S = getLoc(); 6146 6147 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6148 while (!isToken(AsmToken::EndOfStatement)) { 6149 if (!parseCnt(Waitcnt)) 6150 return MatchOperand_ParseFail; 6151 } 6152 } else { 6153 if (!parseExpr(Waitcnt)) 6154 return MatchOperand_ParseFail; 6155 } 6156 6157 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6158 return MatchOperand_Success; 6159 } 6160 6161 bool 6162 AMDGPUOperand::isSWaitCnt() const { 6163 return isImm(); 6164 } 6165 6166 //===----------------------------------------------------------------------===// 6167 // hwreg 6168 //===----------------------------------------------------------------------===// 6169 6170 bool 6171 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6172 OperandInfoTy &Offset, 6173 OperandInfoTy &Width) { 6174 using namespace llvm::AMDGPU::Hwreg; 6175 6176 // The register may be specified by name or using a numeric code 6177 HwReg.Loc = getLoc(); 6178 if (isToken(AsmToken::Identifier) && 6179 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6180 HwReg.IsSymbolic = true; 6181 lex(); // skip register name 6182 } else if (!parseExpr(HwReg.Id, "a register name")) { 6183 return false; 6184 } 6185 6186 if (trySkipToken(AsmToken::RParen)) 6187 return true; 6188 6189 // parse optional params 6190 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6191 return false; 6192 6193 Offset.Loc = getLoc(); 6194 if (!parseExpr(Offset.Id)) 6195 return false; 6196 6197 if (!skipToken(AsmToken::Comma, "expected a comma")) 6198 return false; 6199 6200 Width.Loc = getLoc(); 6201 return parseExpr(Width.Id) && 6202 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6203 } 6204 6205 bool 6206 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6207 const OperandInfoTy &Offset, 6208 const OperandInfoTy &Width) { 6209 6210 using namespace llvm::AMDGPU::Hwreg; 6211 6212 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6213 Error(HwReg.Loc, 6214 "specified hardware register is not supported on this GPU"); 6215 return false; 6216 } 6217 if (!isValidHwreg(HwReg.Id)) { 6218 Error(HwReg.Loc, 6219 "invalid code of hardware register: only 6-bit values are legal"); 6220 return false; 6221 } 6222 if (!isValidHwregOffset(Offset.Id)) { 6223 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6224 return false; 6225 } 6226 if (!isValidHwregWidth(Width.Id)) { 6227 Error(Width.Loc, 6228 "invalid bitfield width: only values from 1 to 32 are legal"); 6229 return false; 6230 } 6231 return true; 6232 } 6233 6234 OperandMatchResultTy 6235 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6236 using namespace llvm::AMDGPU::Hwreg; 6237 6238 int64_t ImmVal = 0; 6239 SMLoc Loc = getLoc(); 6240 6241 if (trySkipId("hwreg", AsmToken::LParen)) { 6242 OperandInfoTy HwReg(ID_UNKNOWN_); 6243 OperandInfoTy Offset(OFFSET_DEFAULT_); 6244 OperandInfoTy Width(WIDTH_DEFAULT_); 6245 if (parseHwregBody(HwReg, Offset, Width) && 6246 validateHwreg(HwReg, Offset, Width)) { 6247 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6248 } else { 6249 return MatchOperand_ParseFail; 6250 } 6251 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6252 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6253 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6254 return MatchOperand_ParseFail; 6255 } 6256 } else { 6257 return MatchOperand_ParseFail; 6258 } 6259 6260 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6261 return MatchOperand_Success; 6262 } 6263 6264 bool AMDGPUOperand::isHwreg() const { 6265 return isImmTy(ImmTyHwreg); 6266 } 6267 6268 //===----------------------------------------------------------------------===// 6269 // sendmsg 6270 //===----------------------------------------------------------------------===// 6271 6272 bool 6273 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6274 OperandInfoTy &Op, 6275 OperandInfoTy &Stream) { 6276 using namespace llvm::AMDGPU::SendMsg; 6277 6278 Msg.Loc = getLoc(); 6279 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6280 Msg.IsSymbolic = true; 6281 lex(); // skip message name 6282 } else if (!parseExpr(Msg.Id, "a message name")) { 6283 return false; 6284 } 6285 6286 if (trySkipToken(AsmToken::Comma)) { 6287 Op.IsDefined = true; 6288 Op.Loc = getLoc(); 6289 if (isToken(AsmToken::Identifier) && 6290 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6291 lex(); // skip operation name 6292 } else if (!parseExpr(Op.Id, "an operation name")) { 6293 return false; 6294 } 6295 6296 if (trySkipToken(AsmToken::Comma)) { 6297 Stream.IsDefined = true; 6298 Stream.Loc = getLoc(); 6299 if (!parseExpr(Stream.Id)) 6300 return false; 6301 } 6302 } 6303 6304 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6305 } 6306 6307 bool 6308 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6309 const OperandInfoTy &Op, 6310 const OperandInfoTy &Stream) { 6311 using namespace llvm::AMDGPU::SendMsg; 6312 6313 // Validation strictness depends on whether message is specified 6314 // in a symbolc or in a numeric form. In the latter case 6315 // only encoding possibility is checked. 6316 bool Strict = Msg.IsSymbolic; 6317 6318 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6319 Error(Msg.Loc, "invalid message id"); 6320 return false; 6321 } 6322 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6323 if (Op.IsDefined) { 6324 Error(Op.Loc, "message does not support operations"); 6325 } else { 6326 Error(Msg.Loc, "missing message operation"); 6327 } 6328 return false; 6329 } 6330 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6331 Error(Op.Loc, "invalid operation id"); 6332 return false; 6333 } 6334 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6335 Error(Stream.Loc, "message operation does not support streams"); 6336 return false; 6337 } 6338 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6339 Error(Stream.Loc, "invalid message stream id"); 6340 return false; 6341 } 6342 return true; 6343 } 6344 6345 OperandMatchResultTy 6346 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6347 using namespace llvm::AMDGPU::SendMsg; 6348 6349 int64_t ImmVal = 0; 6350 SMLoc Loc = getLoc(); 6351 6352 if (trySkipId("sendmsg", AsmToken::LParen)) { 6353 OperandInfoTy Msg(ID_UNKNOWN_); 6354 OperandInfoTy Op(OP_NONE_); 6355 OperandInfoTy Stream(STREAM_ID_NONE_); 6356 if (parseSendMsgBody(Msg, Op, Stream) && 6357 validateSendMsg(Msg, Op, Stream)) { 6358 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6359 } else { 6360 return MatchOperand_ParseFail; 6361 } 6362 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6363 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6364 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6365 return MatchOperand_ParseFail; 6366 } 6367 } else { 6368 return MatchOperand_ParseFail; 6369 } 6370 6371 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6372 return MatchOperand_Success; 6373 } 6374 6375 bool AMDGPUOperand::isSendMsg() const { 6376 return isImmTy(ImmTySendMsg); 6377 } 6378 6379 //===----------------------------------------------------------------------===// 6380 // v_interp 6381 //===----------------------------------------------------------------------===// 6382 6383 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6384 StringRef Str; 6385 SMLoc S = getLoc(); 6386 6387 if (!parseId(Str)) 6388 return MatchOperand_NoMatch; 6389 6390 int Slot = StringSwitch<int>(Str) 6391 .Case("p10", 0) 6392 .Case("p20", 1) 6393 .Case("p0", 2) 6394 .Default(-1); 6395 6396 if (Slot == -1) { 6397 Error(S, "invalid interpolation slot"); 6398 return MatchOperand_ParseFail; 6399 } 6400 6401 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6402 AMDGPUOperand::ImmTyInterpSlot)); 6403 return MatchOperand_Success; 6404 } 6405 6406 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6407 StringRef Str; 6408 SMLoc S = getLoc(); 6409 6410 if (!parseId(Str)) 6411 return MatchOperand_NoMatch; 6412 6413 if (!Str.startswith("attr")) { 6414 Error(S, "invalid interpolation attribute"); 6415 return MatchOperand_ParseFail; 6416 } 6417 6418 StringRef Chan = Str.take_back(2); 6419 int AttrChan = StringSwitch<int>(Chan) 6420 .Case(".x", 0) 6421 .Case(".y", 1) 6422 .Case(".z", 2) 6423 .Case(".w", 3) 6424 .Default(-1); 6425 if (AttrChan == -1) { 6426 Error(S, "invalid or missing interpolation attribute channel"); 6427 return MatchOperand_ParseFail; 6428 } 6429 6430 Str = Str.drop_back(2).drop_front(4); 6431 6432 uint8_t Attr; 6433 if (Str.getAsInteger(10, Attr)) { 6434 Error(S, "invalid or missing interpolation attribute number"); 6435 return MatchOperand_ParseFail; 6436 } 6437 6438 if (Attr > 63) { 6439 Error(S, "out of bounds interpolation attribute number"); 6440 return MatchOperand_ParseFail; 6441 } 6442 6443 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6444 6445 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6446 AMDGPUOperand::ImmTyInterpAttr)); 6447 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6448 AMDGPUOperand::ImmTyAttrChan)); 6449 return MatchOperand_Success; 6450 } 6451 6452 //===----------------------------------------------------------------------===// 6453 // exp 6454 //===----------------------------------------------------------------------===// 6455 6456 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6457 using namespace llvm::AMDGPU::Exp; 6458 6459 StringRef Str; 6460 SMLoc S = getLoc(); 6461 6462 if (!parseId(Str)) 6463 return MatchOperand_NoMatch; 6464 6465 unsigned Id = getTgtId(Str); 6466 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6467 Error(S, (Id == ET_INVALID) ? 6468 "invalid exp target" : 6469 "exp target is not supported on this GPU"); 6470 return MatchOperand_ParseFail; 6471 } 6472 6473 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6474 AMDGPUOperand::ImmTyExpTgt)); 6475 return MatchOperand_Success; 6476 } 6477 6478 //===----------------------------------------------------------------------===// 6479 // parser helpers 6480 //===----------------------------------------------------------------------===// 6481 6482 bool 6483 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6484 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6485 } 6486 6487 bool 6488 AMDGPUAsmParser::isId(const StringRef Id) const { 6489 return isId(getToken(), Id); 6490 } 6491 6492 bool 6493 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6494 return getTokenKind() == Kind; 6495 } 6496 6497 bool 6498 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6499 if (isId(Id)) { 6500 lex(); 6501 return true; 6502 } 6503 return false; 6504 } 6505 6506 bool 6507 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6508 if (isToken(AsmToken::Identifier)) { 6509 StringRef Tok = getTokenStr(); 6510 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6511 lex(); 6512 return true; 6513 } 6514 } 6515 return false; 6516 } 6517 6518 bool 6519 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6520 if (isId(Id) && peekToken().is(Kind)) { 6521 lex(); 6522 lex(); 6523 return true; 6524 } 6525 return false; 6526 } 6527 6528 bool 6529 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6530 if (isToken(Kind)) { 6531 lex(); 6532 return true; 6533 } 6534 return false; 6535 } 6536 6537 bool 6538 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6539 const StringRef ErrMsg) { 6540 if (!trySkipToken(Kind)) { 6541 Error(getLoc(), ErrMsg); 6542 return false; 6543 } 6544 return true; 6545 } 6546 6547 bool 6548 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6549 SMLoc S = getLoc(); 6550 6551 const MCExpr *Expr; 6552 if (Parser.parseExpression(Expr)) 6553 return false; 6554 6555 if (Expr->evaluateAsAbsolute(Imm)) 6556 return true; 6557 6558 if (Expected.empty()) { 6559 Error(S, "expected absolute expression"); 6560 } else { 6561 Error(S, Twine("expected ", Expected) + 6562 Twine(" or an absolute expression")); 6563 } 6564 return false; 6565 } 6566 6567 bool 6568 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6569 SMLoc S = getLoc(); 6570 6571 const MCExpr *Expr; 6572 if (Parser.parseExpression(Expr)) 6573 return false; 6574 6575 int64_t IntVal; 6576 if (Expr->evaluateAsAbsolute(IntVal)) { 6577 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6578 } else { 6579 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6580 } 6581 return true; 6582 } 6583 6584 bool 6585 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6586 if (isToken(AsmToken::String)) { 6587 Val = getToken().getStringContents(); 6588 lex(); 6589 return true; 6590 } else { 6591 Error(getLoc(), ErrMsg); 6592 return false; 6593 } 6594 } 6595 6596 bool 6597 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6598 if (isToken(AsmToken::Identifier)) { 6599 Val = getTokenStr(); 6600 lex(); 6601 return true; 6602 } else { 6603 if (!ErrMsg.empty()) 6604 Error(getLoc(), ErrMsg); 6605 return false; 6606 } 6607 } 6608 6609 AsmToken 6610 AMDGPUAsmParser::getToken() const { 6611 return Parser.getTok(); 6612 } 6613 6614 AsmToken 6615 AMDGPUAsmParser::peekToken() { 6616 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6617 } 6618 6619 void 6620 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6621 auto TokCount = getLexer().peekTokens(Tokens); 6622 6623 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6624 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6625 } 6626 6627 AsmToken::TokenKind 6628 AMDGPUAsmParser::getTokenKind() const { 6629 return getLexer().getKind(); 6630 } 6631 6632 SMLoc 6633 AMDGPUAsmParser::getLoc() const { 6634 return getToken().getLoc(); 6635 } 6636 6637 StringRef 6638 AMDGPUAsmParser::getTokenStr() const { 6639 return getToken().getString(); 6640 } 6641 6642 void 6643 AMDGPUAsmParser::lex() { 6644 Parser.Lex(); 6645 } 6646 6647 SMLoc 6648 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6649 const OperandVector &Operands) const { 6650 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6651 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6652 if (Test(Op)) 6653 return Op.getStartLoc(); 6654 } 6655 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6656 } 6657 6658 SMLoc 6659 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6660 const OperandVector &Operands) const { 6661 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6662 return getOperandLoc(Test, Operands); 6663 } 6664 6665 SMLoc 6666 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6667 const OperandVector &Operands) const { 6668 auto Test = [=](const AMDGPUOperand& Op) { 6669 return Op.isRegKind() && Op.getReg() == Reg; 6670 }; 6671 return getOperandLoc(Test, Operands); 6672 } 6673 6674 SMLoc 6675 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6676 auto Test = [](const AMDGPUOperand& Op) { 6677 return Op.IsImmKindLiteral() || Op.isExpr(); 6678 }; 6679 return getOperandLoc(Test, Operands); 6680 } 6681 6682 SMLoc 6683 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6684 auto Test = [](const AMDGPUOperand& Op) { 6685 return Op.isImmKindConst(); 6686 }; 6687 return getOperandLoc(Test, Operands); 6688 } 6689 6690 //===----------------------------------------------------------------------===// 6691 // swizzle 6692 //===----------------------------------------------------------------------===// 6693 6694 LLVM_READNONE 6695 static unsigned 6696 encodeBitmaskPerm(const unsigned AndMask, 6697 const unsigned OrMask, 6698 const unsigned XorMask) { 6699 using namespace llvm::AMDGPU::Swizzle; 6700 6701 return BITMASK_PERM_ENC | 6702 (AndMask << BITMASK_AND_SHIFT) | 6703 (OrMask << BITMASK_OR_SHIFT) | 6704 (XorMask << BITMASK_XOR_SHIFT); 6705 } 6706 6707 bool 6708 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6709 const unsigned MinVal, 6710 const unsigned MaxVal, 6711 const StringRef ErrMsg, 6712 SMLoc &Loc) { 6713 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6714 return false; 6715 } 6716 Loc = getLoc(); 6717 if (!parseExpr(Op)) { 6718 return false; 6719 } 6720 if (Op < MinVal || Op > MaxVal) { 6721 Error(Loc, ErrMsg); 6722 return false; 6723 } 6724 6725 return true; 6726 } 6727 6728 bool 6729 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6730 const unsigned MinVal, 6731 const unsigned MaxVal, 6732 const StringRef ErrMsg) { 6733 SMLoc Loc; 6734 for (unsigned i = 0; i < OpNum; ++i) { 6735 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6736 return false; 6737 } 6738 6739 return true; 6740 } 6741 6742 bool 6743 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6744 using namespace llvm::AMDGPU::Swizzle; 6745 6746 int64_t Lane[LANE_NUM]; 6747 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6748 "expected a 2-bit lane id")) { 6749 Imm = QUAD_PERM_ENC; 6750 for (unsigned I = 0; I < LANE_NUM; ++I) { 6751 Imm |= Lane[I] << (LANE_SHIFT * I); 6752 } 6753 return true; 6754 } 6755 return false; 6756 } 6757 6758 bool 6759 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6760 using namespace llvm::AMDGPU::Swizzle; 6761 6762 SMLoc Loc; 6763 int64_t GroupSize; 6764 int64_t LaneIdx; 6765 6766 if (!parseSwizzleOperand(GroupSize, 6767 2, 32, 6768 "group size must be in the interval [2,32]", 6769 Loc)) { 6770 return false; 6771 } 6772 if (!isPowerOf2_64(GroupSize)) { 6773 Error(Loc, "group size must be a power of two"); 6774 return false; 6775 } 6776 if (parseSwizzleOperand(LaneIdx, 6777 0, GroupSize - 1, 6778 "lane id must be in the interval [0,group size - 1]", 6779 Loc)) { 6780 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6781 return true; 6782 } 6783 return false; 6784 } 6785 6786 bool 6787 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6788 using namespace llvm::AMDGPU::Swizzle; 6789 6790 SMLoc Loc; 6791 int64_t GroupSize; 6792 6793 if (!parseSwizzleOperand(GroupSize, 6794 2, 32, 6795 "group size must be in the interval [2,32]", 6796 Loc)) { 6797 return false; 6798 } 6799 if (!isPowerOf2_64(GroupSize)) { 6800 Error(Loc, "group size must be a power of two"); 6801 return false; 6802 } 6803 6804 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6805 return true; 6806 } 6807 6808 bool 6809 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6810 using namespace llvm::AMDGPU::Swizzle; 6811 6812 SMLoc Loc; 6813 int64_t GroupSize; 6814 6815 if (!parseSwizzleOperand(GroupSize, 6816 1, 16, 6817 "group size must be in the interval [1,16]", 6818 Loc)) { 6819 return false; 6820 } 6821 if (!isPowerOf2_64(GroupSize)) { 6822 Error(Loc, "group size must be a power of two"); 6823 return false; 6824 } 6825 6826 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6827 return true; 6828 } 6829 6830 bool 6831 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6832 using namespace llvm::AMDGPU::Swizzle; 6833 6834 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6835 return false; 6836 } 6837 6838 StringRef Ctl; 6839 SMLoc StrLoc = getLoc(); 6840 if (!parseString(Ctl)) { 6841 return false; 6842 } 6843 if (Ctl.size() != BITMASK_WIDTH) { 6844 Error(StrLoc, "expected a 5-character mask"); 6845 return false; 6846 } 6847 6848 unsigned AndMask = 0; 6849 unsigned OrMask = 0; 6850 unsigned XorMask = 0; 6851 6852 for (size_t i = 0; i < Ctl.size(); ++i) { 6853 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6854 switch(Ctl[i]) { 6855 default: 6856 Error(StrLoc, "invalid mask"); 6857 return false; 6858 case '0': 6859 break; 6860 case '1': 6861 OrMask |= Mask; 6862 break; 6863 case 'p': 6864 AndMask |= Mask; 6865 break; 6866 case 'i': 6867 AndMask |= Mask; 6868 XorMask |= Mask; 6869 break; 6870 } 6871 } 6872 6873 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6874 return true; 6875 } 6876 6877 bool 6878 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6879 6880 SMLoc OffsetLoc = getLoc(); 6881 6882 if (!parseExpr(Imm, "a swizzle macro")) { 6883 return false; 6884 } 6885 if (!isUInt<16>(Imm)) { 6886 Error(OffsetLoc, "expected a 16-bit offset"); 6887 return false; 6888 } 6889 return true; 6890 } 6891 6892 bool 6893 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6894 using namespace llvm::AMDGPU::Swizzle; 6895 6896 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6897 6898 SMLoc ModeLoc = getLoc(); 6899 bool Ok = false; 6900 6901 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6902 Ok = parseSwizzleQuadPerm(Imm); 6903 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6904 Ok = parseSwizzleBitmaskPerm(Imm); 6905 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6906 Ok = parseSwizzleBroadcast(Imm); 6907 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6908 Ok = parseSwizzleSwap(Imm); 6909 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6910 Ok = parseSwizzleReverse(Imm); 6911 } else { 6912 Error(ModeLoc, "expected a swizzle mode"); 6913 } 6914 6915 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6916 } 6917 6918 return false; 6919 } 6920 6921 OperandMatchResultTy 6922 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6923 SMLoc S = getLoc(); 6924 int64_t Imm = 0; 6925 6926 if (trySkipId("offset")) { 6927 6928 bool Ok = false; 6929 if (skipToken(AsmToken::Colon, "expected a colon")) { 6930 if (trySkipId("swizzle")) { 6931 Ok = parseSwizzleMacro(Imm); 6932 } else { 6933 Ok = parseSwizzleOffset(Imm); 6934 } 6935 } 6936 6937 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6938 6939 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6940 } else { 6941 // Swizzle "offset" operand is optional. 6942 // If it is omitted, try parsing other optional operands. 6943 return parseOptionalOpr(Operands); 6944 } 6945 } 6946 6947 bool 6948 AMDGPUOperand::isSwizzle() const { 6949 return isImmTy(ImmTySwizzle); 6950 } 6951 6952 //===----------------------------------------------------------------------===// 6953 // VGPR Index Mode 6954 //===----------------------------------------------------------------------===// 6955 6956 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6957 6958 using namespace llvm::AMDGPU::VGPRIndexMode; 6959 6960 if (trySkipToken(AsmToken::RParen)) { 6961 return OFF; 6962 } 6963 6964 int64_t Imm = 0; 6965 6966 while (true) { 6967 unsigned Mode = 0; 6968 SMLoc S = getLoc(); 6969 6970 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6971 if (trySkipId(IdSymbolic[ModeId])) { 6972 Mode = 1 << ModeId; 6973 break; 6974 } 6975 } 6976 6977 if (Mode == 0) { 6978 Error(S, (Imm == 0)? 6979 "expected a VGPR index mode or a closing parenthesis" : 6980 "expected a VGPR index mode"); 6981 return UNDEF; 6982 } 6983 6984 if (Imm & Mode) { 6985 Error(S, "duplicate VGPR index mode"); 6986 return UNDEF; 6987 } 6988 Imm |= Mode; 6989 6990 if (trySkipToken(AsmToken::RParen)) 6991 break; 6992 if (!skipToken(AsmToken::Comma, 6993 "expected a comma or a closing parenthesis")) 6994 return UNDEF; 6995 } 6996 6997 return Imm; 6998 } 6999 7000 OperandMatchResultTy 7001 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7002 7003 using namespace llvm::AMDGPU::VGPRIndexMode; 7004 7005 int64_t Imm = 0; 7006 SMLoc S = getLoc(); 7007 7008 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7009 Imm = parseGPRIdxMacro(); 7010 if (Imm == UNDEF) 7011 return MatchOperand_ParseFail; 7012 } else { 7013 if (getParser().parseAbsoluteExpression(Imm)) 7014 return MatchOperand_ParseFail; 7015 if (Imm < 0 || !isUInt<4>(Imm)) { 7016 Error(S, "invalid immediate: only 4-bit values are legal"); 7017 return MatchOperand_ParseFail; 7018 } 7019 } 7020 7021 Operands.push_back( 7022 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7023 return MatchOperand_Success; 7024 } 7025 7026 bool AMDGPUOperand::isGPRIdxMode() const { 7027 return isImmTy(ImmTyGprIdxMode); 7028 } 7029 7030 //===----------------------------------------------------------------------===// 7031 // sopp branch targets 7032 //===----------------------------------------------------------------------===// 7033 7034 OperandMatchResultTy 7035 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7036 7037 // Make sure we are not parsing something 7038 // that looks like a label or an expression but is not. 7039 // This will improve error messages. 7040 if (isRegister() || isModifier()) 7041 return MatchOperand_NoMatch; 7042 7043 if (!parseExpr(Operands)) 7044 return MatchOperand_ParseFail; 7045 7046 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7047 assert(Opr.isImm() || Opr.isExpr()); 7048 SMLoc Loc = Opr.getStartLoc(); 7049 7050 // Currently we do not support arbitrary expressions as branch targets. 7051 // Only labels and absolute expressions are accepted. 7052 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7053 Error(Loc, "expected an absolute expression or a label"); 7054 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7055 Error(Loc, "expected a 16-bit signed jump offset"); 7056 } 7057 7058 return MatchOperand_Success; 7059 } 7060 7061 //===----------------------------------------------------------------------===// 7062 // Boolean holding registers 7063 //===----------------------------------------------------------------------===// 7064 7065 OperandMatchResultTy 7066 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7067 return parseReg(Operands); 7068 } 7069 7070 //===----------------------------------------------------------------------===// 7071 // mubuf 7072 //===----------------------------------------------------------------------===// 7073 7074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7075 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7076 } 7077 7078 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7079 const OperandVector &Operands, 7080 bool IsAtomic, 7081 bool IsLds) { 7082 bool IsLdsOpcode = IsLds; 7083 bool HasLdsModifier = false; 7084 OptionalImmIndexMap OptionalIdx; 7085 unsigned FirstOperandIdx = 1; 7086 bool IsAtomicReturn = false; 7087 7088 if (IsAtomic) { 7089 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7090 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7091 if (!Op.isCPol()) 7092 continue; 7093 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7094 break; 7095 } 7096 7097 if (!IsAtomicReturn) { 7098 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7099 if (NewOpc != -1) 7100 Inst.setOpcode(NewOpc); 7101 } 7102 7103 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7104 SIInstrFlags::IsAtomicRet; 7105 } 7106 7107 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7108 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7109 7110 // Add the register arguments 7111 if (Op.isReg()) { 7112 Op.addRegOperands(Inst, 1); 7113 // Insert a tied src for atomic return dst. 7114 // This cannot be postponed as subsequent calls to 7115 // addImmOperands rely on correct number of MC operands. 7116 if (IsAtomicReturn && i == FirstOperandIdx) 7117 Op.addRegOperands(Inst, 1); 7118 continue; 7119 } 7120 7121 // Handle the case where soffset is an immediate 7122 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7123 Op.addImmOperands(Inst, 1); 7124 continue; 7125 } 7126 7127 HasLdsModifier |= Op.isLDS(); 7128 7129 // Handle tokens like 'offen' which are sometimes hard-coded into the 7130 // asm string. There are no MCInst operands for these. 7131 if (Op.isToken()) { 7132 continue; 7133 } 7134 assert(Op.isImm()); 7135 7136 // Handle optional arguments 7137 OptionalIdx[Op.getImmTy()] = i; 7138 } 7139 7140 // This is a workaround for an llvm quirk which may result in an 7141 // incorrect instruction selection. Lds and non-lds versions of 7142 // MUBUF instructions are identical except that lds versions 7143 // have mandatory 'lds' modifier. However this modifier follows 7144 // optional modifiers and llvm asm matcher regards this 'lds' 7145 // modifier as an optional one. As a result, an lds version 7146 // of opcode may be selected even if it has no 'lds' modifier. 7147 if (IsLdsOpcode && !HasLdsModifier) { 7148 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7149 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7150 Inst.setOpcode(NoLdsOpcode); 7151 IsLdsOpcode = false; 7152 } 7153 } 7154 7155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7157 7158 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7160 } 7161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7162 } 7163 7164 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7165 OptionalImmIndexMap OptionalIdx; 7166 7167 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7168 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7169 7170 // Add the register arguments 7171 if (Op.isReg()) { 7172 Op.addRegOperands(Inst, 1); 7173 continue; 7174 } 7175 7176 // Handle the case where soffset is an immediate 7177 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7178 Op.addImmOperands(Inst, 1); 7179 continue; 7180 } 7181 7182 // Handle tokens like 'offen' which are sometimes hard-coded into the 7183 // asm string. There are no MCInst operands for these. 7184 if (Op.isToken()) { 7185 continue; 7186 } 7187 assert(Op.isImm()); 7188 7189 // Handle optional arguments 7190 OptionalIdx[Op.getImmTy()] = i; 7191 } 7192 7193 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7194 AMDGPUOperand::ImmTyOffset); 7195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7199 } 7200 7201 //===----------------------------------------------------------------------===// 7202 // mimg 7203 //===----------------------------------------------------------------------===// 7204 7205 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7206 bool IsAtomic) { 7207 unsigned I = 1; 7208 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7209 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7210 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7211 } 7212 7213 if (IsAtomic) { 7214 // Add src, same as dst 7215 assert(Desc.getNumDefs() == 1); 7216 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7217 } 7218 7219 OptionalImmIndexMap OptionalIdx; 7220 7221 for (unsigned E = Operands.size(); I != E; ++I) { 7222 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7223 7224 // Add the register arguments 7225 if (Op.isReg()) { 7226 Op.addRegOperands(Inst, 1); 7227 } else if (Op.isImmModifier()) { 7228 OptionalIdx[Op.getImmTy()] = I; 7229 } else if (!Op.isToken()) { 7230 llvm_unreachable("unexpected operand type"); 7231 } 7232 } 7233 7234 bool IsGFX10Plus = isGFX10Plus(); 7235 7236 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7237 if (IsGFX10Plus) 7238 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7242 if (IsGFX10Plus) 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7244 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7247 if (!IsGFX10Plus) 7248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7250 } 7251 7252 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7253 cvtMIMG(Inst, Operands, true); 7254 } 7255 7256 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7257 OptionalImmIndexMap OptionalIdx; 7258 bool IsAtomicReturn = false; 7259 7260 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7261 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7262 if (!Op.isCPol()) 7263 continue; 7264 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7265 break; 7266 } 7267 7268 if (!IsAtomicReturn) { 7269 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7270 if (NewOpc != -1) 7271 Inst.setOpcode(NewOpc); 7272 } 7273 7274 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7275 SIInstrFlags::IsAtomicRet; 7276 7277 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7278 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7279 7280 // Add the register arguments 7281 if (Op.isReg()) { 7282 Op.addRegOperands(Inst, 1); 7283 if (IsAtomicReturn && i == 1) 7284 Op.addRegOperands(Inst, 1); 7285 continue; 7286 } 7287 7288 // Handle the case where soffset is an immediate 7289 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7290 Op.addImmOperands(Inst, 1); 7291 continue; 7292 } 7293 7294 // Handle tokens like 'offen' which are sometimes hard-coded into the 7295 // asm string. There are no MCInst operands for these. 7296 if (Op.isToken()) { 7297 continue; 7298 } 7299 assert(Op.isImm()); 7300 7301 // Handle optional arguments 7302 OptionalIdx[Op.getImmTy()] = i; 7303 } 7304 7305 if ((int)Inst.getNumOperands() <= 7306 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7309 } 7310 7311 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7312 const OperandVector &Operands) { 7313 for (unsigned I = 1; I < Operands.size(); ++I) { 7314 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7315 if (Operand.isReg()) 7316 Operand.addRegOperands(Inst, 1); 7317 } 7318 7319 Inst.addOperand(MCOperand::createImm(1)); // a16 7320 } 7321 7322 //===----------------------------------------------------------------------===// 7323 // smrd 7324 //===----------------------------------------------------------------------===// 7325 7326 bool AMDGPUOperand::isSMRDOffset8() const { 7327 return isImm() && isUInt<8>(getImm()); 7328 } 7329 7330 bool AMDGPUOperand::isSMEMOffset() const { 7331 return isImm(); // Offset range is checked later by validator. 7332 } 7333 7334 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7335 // 32-bit literals are only supported on CI and we only want to use them 7336 // when the offset is > 8-bits. 7337 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7338 } 7339 7340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7341 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7342 } 7343 7344 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7345 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7346 } 7347 7348 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7349 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7350 } 7351 7352 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7353 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7354 } 7355 7356 //===----------------------------------------------------------------------===// 7357 // vop3 7358 //===----------------------------------------------------------------------===// 7359 7360 static bool ConvertOmodMul(int64_t &Mul) { 7361 if (Mul != 1 && Mul != 2 && Mul != 4) 7362 return false; 7363 7364 Mul >>= 1; 7365 return true; 7366 } 7367 7368 static bool ConvertOmodDiv(int64_t &Div) { 7369 if (Div == 1) { 7370 Div = 0; 7371 return true; 7372 } 7373 7374 if (Div == 2) { 7375 Div = 3; 7376 return true; 7377 } 7378 7379 return false; 7380 } 7381 7382 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7383 // This is intentional and ensures compatibility with sp3. 7384 // See bug 35397 for details. 7385 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7386 if (BoundCtrl == 0 || BoundCtrl == 1) { 7387 BoundCtrl = 1; 7388 return true; 7389 } 7390 return false; 7391 } 7392 7393 // Note: the order in this table matches the order of operands in AsmString. 7394 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7395 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7396 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7397 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7398 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7399 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7400 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7401 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7402 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7403 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7404 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7405 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7406 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7407 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7408 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7409 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7410 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7411 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7412 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7413 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7414 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7415 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7416 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7417 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7418 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7419 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7420 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7421 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7422 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7423 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7424 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7425 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7426 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7427 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7428 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7429 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7430 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7431 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7432 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7433 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7434 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7435 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7436 }; 7437 7438 void AMDGPUAsmParser::onBeginOfFile() { 7439 if (!getParser().getStreamer().getTargetStreamer() || 7440 getSTI().getTargetTriple().getArch() == Triple::r600) 7441 return; 7442 7443 if (!getTargetStreamer().getTargetID()) 7444 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7445 7446 if (isHsaAbiVersion3AndAbove(&getSTI())) 7447 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7448 } 7449 7450 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7451 7452 OperandMatchResultTy res = parseOptionalOpr(Operands); 7453 7454 // This is a hack to enable hardcoded mandatory operands which follow 7455 // optional operands. 7456 // 7457 // Current design assumes that all operands after the first optional operand 7458 // are also optional. However implementation of some instructions violates 7459 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7460 // 7461 // To alleviate this problem, we have to (implicitly) parse extra operands 7462 // to make sure autogenerated parser of custom operands never hit hardcoded 7463 // mandatory operands. 7464 7465 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7466 if (res != MatchOperand_Success || 7467 isToken(AsmToken::EndOfStatement)) 7468 break; 7469 7470 trySkipToken(AsmToken::Comma); 7471 res = parseOptionalOpr(Operands); 7472 } 7473 7474 return res; 7475 } 7476 7477 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7478 OperandMatchResultTy res; 7479 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7480 // try to parse any optional operand here 7481 if (Op.IsBit) { 7482 res = parseNamedBit(Op.Name, Operands, Op.Type); 7483 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7484 res = parseOModOperand(Operands); 7485 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7486 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7487 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7488 res = parseSDWASel(Operands, Op.Name, Op.Type); 7489 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7490 res = parseSDWADstUnused(Operands); 7491 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7492 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7493 Op.Type == AMDGPUOperand::ImmTyNegLo || 7494 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7495 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7496 Op.ConvertResult); 7497 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7498 res = parseDim(Operands); 7499 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7500 res = parseCPol(Operands); 7501 } else { 7502 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7503 } 7504 if (res != MatchOperand_NoMatch) { 7505 return res; 7506 } 7507 } 7508 return MatchOperand_NoMatch; 7509 } 7510 7511 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7512 StringRef Name = getTokenStr(); 7513 if (Name == "mul") { 7514 return parseIntWithPrefix("mul", Operands, 7515 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7516 } 7517 7518 if (Name == "div") { 7519 return parseIntWithPrefix("div", Operands, 7520 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7521 } 7522 7523 return MatchOperand_NoMatch; 7524 } 7525 7526 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7527 cvtVOP3P(Inst, Operands); 7528 7529 int Opc = Inst.getOpcode(); 7530 7531 int SrcNum; 7532 const int Ops[] = { AMDGPU::OpName::src0, 7533 AMDGPU::OpName::src1, 7534 AMDGPU::OpName::src2 }; 7535 for (SrcNum = 0; 7536 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7537 ++SrcNum); 7538 assert(SrcNum > 0); 7539 7540 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7541 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7542 7543 if ((OpSel & (1 << SrcNum)) != 0) { 7544 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7545 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7546 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7547 } 7548 } 7549 7550 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7551 // 1. This operand is input modifiers 7552 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7553 // 2. This is not last operand 7554 && Desc.NumOperands > (OpNum + 1) 7555 // 3. Next operand is register class 7556 && Desc.OpInfo[OpNum + 1].RegClass != -1 7557 // 4. Next register is not tied to any other operand 7558 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7559 } 7560 7561 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7562 { 7563 OptionalImmIndexMap OptionalIdx; 7564 unsigned Opc = Inst.getOpcode(); 7565 7566 unsigned I = 1; 7567 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7568 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7569 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7570 } 7571 7572 for (unsigned E = Operands.size(); I != E; ++I) { 7573 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7574 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7575 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7576 } else if (Op.isInterpSlot() || 7577 Op.isInterpAttr() || 7578 Op.isAttrChan()) { 7579 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7580 } else if (Op.isImmModifier()) { 7581 OptionalIdx[Op.getImmTy()] = I; 7582 } else { 7583 llvm_unreachable("unhandled operand type"); 7584 } 7585 } 7586 7587 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7589 } 7590 7591 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7593 } 7594 7595 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7596 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7597 } 7598 } 7599 7600 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7601 OptionalImmIndexMap &OptionalIdx) { 7602 unsigned Opc = Inst.getOpcode(); 7603 7604 unsigned I = 1; 7605 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7606 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7607 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7608 } 7609 7610 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7611 // This instruction has src modifiers 7612 for (unsigned E = Operands.size(); I != E; ++I) { 7613 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7614 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7615 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7616 } else if (Op.isImmModifier()) { 7617 OptionalIdx[Op.getImmTy()] = I; 7618 } else if (Op.isRegOrImm()) { 7619 Op.addRegOrImmOperands(Inst, 1); 7620 } else { 7621 llvm_unreachable("unhandled operand type"); 7622 } 7623 } 7624 } else { 7625 // No src modifiers 7626 for (unsigned E = Operands.size(); I != E; ++I) { 7627 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7628 if (Op.isMod()) { 7629 OptionalIdx[Op.getImmTy()] = I; 7630 } else { 7631 Op.addRegOrImmOperands(Inst, 1); 7632 } 7633 } 7634 } 7635 7636 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7638 } 7639 7640 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7642 } 7643 7644 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7645 // it has src2 register operand that is tied to dst operand 7646 // we don't allow modifiers for this operand in assembler so src2_modifiers 7647 // should be 0. 7648 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7649 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7650 Opc == AMDGPU::V_MAC_F32_e64_vi || 7651 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7652 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7653 Opc == AMDGPU::V_MAC_F16_e64_vi || 7654 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7655 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7656 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7657 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7658 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7659 auto it = Inst.begin(); 7660 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7661 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7662 ++it; 7663 // Copy the operand to ensure it's not invalidated when Inst grows. 7664 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7665 } 7666 } 7667 7668 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7669 OptionalImmIndexMap OptionalIdx; 7670 cvtVOP3(Inst, Operands, OptionalIdx); 7671 } 7672 7673 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7674 OptionalImmIndexMap &OptIdx) { 7675 const int Opc = Inst.getOpcode(); 7676 const MCInstrDesc &Desc = MII.get(Opc); 7677 7678 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7679 7680 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7681 assert(!IsPacked); 7682 Inst.addOperand(Inst.getOperand(0)); 7683 } 7684 7685 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7686 // instruction, and then figure out where to actually put the modifiers 7687 7688 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7689 if (OpSelIdx != -1) { 7690 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7691 } 7692 7693 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7694 if (OpSelHiIdx != -1) { 7695 int DefaultVal = IsPacked ? -1 : 0; 7696 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7697 DefaultVal); 7698 } 7699 7700 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7701 if (NegLoIdx != -1) { 7702 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7703 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7704 } 7705 7706 const int Ops[] = { AMDGPU::OpName::src0, 7707 AMDGPU::OpName::src1, 7708 AMDGPU::OpName::src2 }; 7709 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7710 AMDGPU::OpName::src1_modifiers, 7711 AMDGPU::OpName::src2_modifiers }; 7712 7713 unsigned OpSel = 0; 7714 unsigned OpSelHi = 0; 7715 unsigned NegLo = 0; 7716 unsigned NegHi = 0; 7717 7718 if (OpSelIdx != -1) 7719 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7720 7721 if (OpSelHiIdx != -1) 7722 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7723 7724 if (NegLoIdx != -1) { 7725 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7726 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7727 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7728 } 7729 7730 for (int J = 0; J < 3; ++J) { 7731 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7732 if (OpIdx == -1) 7733 break; 7734 7735 uint32_t ModVal = 0; 7736 7737 if ((OpSel & (1 << J)) != 0) 7738 ModVal |= SISrcMods::OP_SEL_0; 7739 7740 if ((OpSelHi & (1 << J)) != 0) 7741 ModVal |= SISrcMods::OP_SEL_1; 7742 7743 if ((NegLo & (1 << J)) != 0) 7744 ModVal |= SISrcMods::NEG; 7745 7746 if ((NegHi & (1 << J)) != 0) 7747 ModVal |= SISrcMods::NEG_HI; 7748 7749 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7750 7751 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7752 } 7753 } 7754 7755 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7756 OptionalImmIndexMap OptIdx; 7757 cvtVOP3(Inst, Operands, OptIdx); 7758 cvtVOP3P(Inst, Operands, OptIdx); 7759 } 7760 7761 //===----------------------------------------------------------------------===// 7762 // dpp 7763 //===----------------------------------------------------------------------===// 7764 7765 bool AMDGPUOperand::isDPP8() const { 7766 return isImmTy(ImmTyDPP8); 7767 } 7768 7769 bool AMDGPUOperand::isDPPCtrl() const { 7770 using namespace AMDGPU::DPP; 7771 7772 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7773 if (result) { 7774 int64_t Imm = getImm(); 7775 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7776 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7777 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7778 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7779 (Imm == DppCtrl::WAVE_SHL1) || 7780 (Imm == DppCtrl::WAVE_ROL1) || 7781 (Imm == DppCtrl::WAVE_SHR1) || 7782 (Imm == DppCtrl::WAVE_ROR1) || 7783 (Imm == DppCtrl::ROW_MIRROR) || 7784 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7785 (Imm == DppCtrl::BCAST15) || 7786 (Imm == DppCtrl::BCAST31) || 7787 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7788 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7789 } 7790 return false; 7791 } 7792 7793 //===----------------------------------------------------------------------===// 7794 // mAI 7795 //===----------------------------------------------------------------------===// 7796 7797 bool AMDGPUOperand::isBLGP() const { 7798 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7799 } 7800 7801 bool AMDGPUOperand::isCBSZ() const { 7802 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7803 } 7804 7805 bool AMDGPUOperand::isABID() const { 7806 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7807 } 7808 7809 bool AMDGPUOperand::isS16Imm() const { 7810 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7811 } 7812 7813 bool AMDGPUOperand::isU16Imm() const { 7814 return isImm() && isUInt<16>(getImm()); 7815 } 7816 7817 //===----------------------------------------------------------------------===// 7818 // dim 7819 //===----------------------------------------------------------------------===// 7820 7821 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7822 // We want to allow "dim:1D" etc., 7823 // but the initial 1 is tokenized as an integer. 7824 std::string Token; 7825 if (isToken(AsmToken::Integer)) { 7826 SMLoc Loc = getToken().getEndLoc(); 7827 Token = std::string(getTokenStr()); 7828 lex(); 7829 if (getLoc() != Loc) 7830 return false; 7831 } 7832 7833 StringRef Suffix; 7834 if (!parseId(Suffix)) 7835 return false; 7836 Token += Suffix; 7837 7838 StringRef DimId = Token; 7839 if (DimId.startswith("SQ_RSRC_IMG_")) 7840 DimId = DimId.drop_front(12); 7841 7842 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7843 if (!DimInfo) 7844 return false; 7845 7846 Encoding = DimInfo->Encoding; 7847 return true; 7848 } 7849 7850 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7851 if (!isGFX10Plus()) 7852 return MatchOperand_NoMatch; 7853 7854 SMLoc S = getLoc(); 7855 7856 if (!trySkipId("dim", AsmToken::Colon)) 7857 return MatchOperand_NoMatch; 7858 7859 unsigned Encoding; 7860 SMLoc Loc = getLoc(); 7861 if (!parseDimId(Encoding)) { 7862 Error(Loc, "invalid dim value"); 7863 return MatchOperand_ParseFail; 7864 } 7865 7866 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7867 AMDGPUOperand::ImmTyDim)); 7868 return MatchOperand_Success; 7869 } 7870 7871 //===----------------------------------------------------------------------===// 7872 // dpp 7873 //===----------------------------------------------------------------------===// 7874 7875 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7876 SMLoc S = getLoc(); 7877 7878 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7879 return MatchOperand_NoMatch; 7880 7881 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7882 7883 int64_t Sels[8]; 7884 7885 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7886 return MatchOperand_ParseFail; 7887 7888 for (size_t i = 0; i < 8; ++i) { 7889 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7890 return MatchOperand_ParseFail; 7891 7892 SMLoc Loc = getLoc(); 7893 if (getParser().parseAbsoluteExpression(Sels[i])) 7894 return MatchOperand_ParseFail; 7895 if (0 > Sels[i] || 7 < Sels[i]) { 7896 Error(Loc, "expected a 3-bit value"); 7897 return MatchOperand_ParseFail; 7898 } 7899 } 7900 7901 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7902 return MatchOperand_ParseFail; 7903 7904 unsigned DPP8 = 0; 7905 for (size_t i = 0; i < 8; ++i) 7906 DPP8 |= (Sels[i] << (i * 3)); 7907 7908 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7909 return MatchOperand_Success; 7910 } 7911 7912 bool 7913 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7914 const OperandVector &Operands) { 7915 if (Ctrl == "row_newbcast") 7916 return isGFX90A(); 7917 7918 if (Ctrl == "row_share" || 7919 Ctrl == "row_xmask") 7920 return isGFX10Plus(); 7921 7922 if (Ctrl == "wave_shl" || 7923 Ctrl == "wave_shr" || 7924 Ctrl == "wave_rol" || 7925 Ctrl == "wave_ror" || 7926 Ctrl == "row_bcast") 7927 return isVI() || isGFX9(); 7928 7929 return Ctrl == "row_mirror" || 7930 Ctrl == "row_half_mirror" || 7931 Ctrl == "quad_perm" || 7932 Ctrl == "row_shl" || 7933 Ctrl == "row_shr" || 7934 Ctrl == "row_ror"; 7935 } 7936 7937 int64_t 7938 AMDGPUAsmParser::parseDPPCtrlPerm() { 7939 // quad_perm:[%d,%d,%d,%d] 7940 7941 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7942 return -1; 7943 7944 int64_t Val = 0; 7945 for (int i = 0; i < 4; ++i) { 7946 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7947 return -1; 7948 7949 int64_t Temp; 7950 SMLoc Loc = getLoc(); 7951 if (getParser().parseAbsoluteExpression(Temp)) 7952 return -1; 7953 if (Temp < 0 || Temp > 3) { 7954 Error(Loc, "expected a 2-bit value"); 7955 return -1; 7956 } 7957 7958 Val += (Temp << i * 2); 7959 } 7960 7961 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7962 return -1; 7963 7964 return Val; 7965 } 7966 7967 int64_t 7968 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7969 using namespace AMDGPU::DPP; 7970 7971 // sel:%d 7972 7973 int64_t Val; 7974 SMLoc Loc = getLoc(); 7975 7976 if (getParser().parseAbsoluteExpression(Val)) 7977 return -1; 7978 7979 struct DppCtrlCheck { 7980 int64_t Ctrl; 7981 int Lo; 7982 int Hi; 7983 }; 7984 7985 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7986 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7987 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7988 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7989 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7990 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7991 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7992 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7993 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7994 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7995 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7996 .Default({-1, 0, 0}); 7997 7998 bool Valid; 7999 if (Check.Ctrl == -1) { 8000 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8001 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8002 } else { 8003 Valid = Check.Lo <= Val && Val <= Check.Hi; 8004 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8005 } 8006 8007 if (!Valid) { 8008 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8009 return -1; 8010 } 8011 8012 return Val; 8013 } 8014 8015 OperandMatchResultTy 8016 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8017 using namespace AMDGPU::DPP; 8018 8019 if (!isToken(AsmToken::Identifier) || 8020 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8021 return MatchOperand_NoMatch; 8022 8023 SMLoc S = getLoc(); 8024 int64_t Val = -1; 8025 StringRef Ctrl; 8026 8027 parseId(Ctrl); 8028 8029 if (Ctrl == "row_mirror") { 8030 Val = DppCtrl::ROW_MIRROR; 8031 } else if (Ctrl == "row_half_mirror") { 8032 Val = DppCtrl::ROW_HALF_MIRROR; 8033 } else { 8034 if (skipToken(AsmToken::Colon, "expected a colon")) { 8035 if (Ctrl == "quad_perm") { 8036 Val = parseDPPCtrlPerm(); 8037 } else { 8038 Val = parseDPPCtrlSel(Ctrl); 8039 } 8040 } 8041 } 8042 8043 if (Val == -1) 8044 return MatchOperand_ParseFail; 8045 8046 Operands.push_back( 8047 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8048 return MatchOperand_Success; 8049 } 8050 8051 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8052 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8053 } 8054 8055 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8056 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8057 } 8058 8059 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8060 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8061 } 8062 8063 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8064 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8065 } 8066 8067 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8068 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8069 } 8070 8071 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8072 OptionalImmIndexMap OptionalIdx; 8073 8074 unsigned Opc = Inst.getOpcode(); 8075 bool HasModifiers = 8076 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8077 unsigned I = 1; 8078 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8079 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8080 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8081 } 8082 8083 int Fi = 0; 8084 for (unsigned E = Operands.size(); I != E; ++I) { 8085 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8086 MCOI::TIED_TO); 8087 if (TiedTo != -1) { 8088 assert((unsigned)TiedTo < Inst.getNumOperands()); 8089 // handle tied old or src2 for MAC instructions 8090 Inst.addOperand(Inst.getOperand(TiedTo)); 8091 } 8092 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8093 // Add the register arguments 8094 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8095 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8096 // Skip it. 8097 continue; 8098 } 8099 8100 if (IsDPP8) { 8101 if (Op.isDPP8()) { 8102 Op.addImmOperands(Inst, 1); 8103 } else if (HasModifiers && 8104 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8105 Op.addRegWithFPInputModsOperands(Inst, 2); 8106 } else if (Op.isFI()) { 8107 Fi = Op.getImm(); 8108 } else if (Op.isReg()) { 8109 Op.addRegOperands(Inst, 1); 8110 } else { 8111 llvm_unreachable("Invalid operand type"); 8112 } 8113 } else { 8114 if (HasModifiers && 8115 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8116 Op.addRegWithFPInputModsOperands(Inst, 2); 8117 } else if (Op.isReg()) { 8118 Op.addRegOperands(Inst, 1); 8119 } else if (Op.isDPPCtrl()) { 8120 Op.addImmOperands(Inst, 1); 8121 } else if (Op.isImm()) { 8122 // Handle optional arguments 8123 OptionalIdx[Op.getImmTy()] = I; 8124 } else { 8125 llvm_unreachable("Invalid operand type"); 8126 } 8127 } 8128 } 8129 8130 if (IsDPP8) { 8131 using namespace llvm::AMDGPU::DPP; 8132 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8133 } else { 8134 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8137 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8138 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8139 } 8140 } 8141 } 8142 8143 //===----------------------------------------------------------------------===// 8144 // sdwa 8145 //===----------------------------------------------------------------------===// 8146 8147 OperandMatchResultTy 8148 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8149 AMDGPUOperand::ImmTy Type) { 8150 using namespace llvm::AMDGPU::SDWA; 8151 8152 SMLoc S = getLoc(); 8153 StringRef Value; 8154 OperandMatchResultTy res; 8155 8156 SMLoc StringLoc; 8157 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8158 if (res != MatchOperand_Success) { 8159 return res; 8160 } 8161 8162 int64_t Int; 8163 Int = StringSwitch<int64_t>(Value) 8164 .Case("BYTE_0", SdwaSel::BYTE_0) 8165 .Case("BYTE_1", SdwaSel::BYTE_1) 8166 .Case("BYTE_2", SdwaSel::BYTE_2) 8167 .Case("BYTE_3", SdwaSel::BYTE_3) 8168 .Case("WORD_0", SdwaSel::WORD_0) 8169 .Case("WORD_1", SdwaSel::WORD_1) 8170 .Case("DWORD", SdwaSel::DWORD) 8171 .Default(0xffffffff); 8172 8173 if (Int == 0xffffffff) { 8174 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8175 return MatchOperand_ParseFail; 8176 } 8177 8178 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8179 return MatchOperand_Success; 8180 } 8181 8182 OperandMatchResultTy 8183 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8184 using namespace llvm::AMDGPU::SDWA; 8185 8186 SMLoc S = getLoc(); 8187 StringRef Value; 8188 OperandMatchResultTy res; 8189 8190 SMLoc StringLoc; 8191 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8192 if (res != MatchOperand_Success) { 8193 return res; 8194 } 8195 8196 int64_t Int; 8197 Int = StringSwitch<int64_t>(Value) 8198 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8199 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8200 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8201 .Default(0xffffffff); 8202 8203 if (Int == 0xffffffff) { 8204 Error(StringLoc, "invalid dst_unused value"); 8205 return MatchOperand_ParseFail; 8206 } 8207 8208 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8209 return MatchOperand_Success; 8210 } 8211 8212 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8213 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8214 } 8215 8216 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8217 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8218 } 8219 8220 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8221 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8222 } 8223 8224 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8225 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8226 } 8227 8228 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8229 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8230 } 8231 8232 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8233 uint64_t BasicInstType, 8234 bool SkipDstVcc, 8235 bool SkipSrcVcc) { 8236 using namespace llvm::AMDGPU::SDWA; 8237 8238 OptionalImmIndexMap OptionalIdx; 8239 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8240 bool SkippedVcc = false; 8241 8242 unsigned I = 1; 8243 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8244 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8245 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8246 } 8247 8248 for (unsigned E = Operands.size(); I != E; ++I) { 8249 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8250 if (SkipVcc && !SkippedVcc && Op.isReg() && 8251 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8252 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8253 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8254 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8255 // Skip VCC only if we didn't skip it on previous iteration. 8256 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8257 if (BasicInstType == SIInstrFlags::VOP2 && 8258 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8259 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8260 SkippedVcc = true; 8261 continue; 8262 } else if (BasicInstType == SIInstrFlags::VOPC && 8263 Inst.getNumOperands() == 0) { 8264 SkippedVcc = true; 8265 continue; 8266 } 8267 } 8268 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8269 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8270 } else if (Op.isImm()) { 8271 // Handle optional arguments 8272 OptionalIdx[Op.getImmTy()] = I; 8273 } else { 8274 llvm_unreachable("Invalid operand type"); 8275 } 8276 SkippedVcc = false; 8277 } 8278 8279 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8280 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8281 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8282 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8283 switch (BasicInstType) { 8284 case SIInstrFlags::VOP1: 8285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8286 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8288 } 8289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8291 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8292 break; 8293 8294 case SIInstrFlags::VOP2: 8295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8296 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8297 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8298 } 8299 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8300 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8303 break; 8304 8305 case SIInstrFlags::VOPC: 8306 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8310 break; 8311 8312 default: 8313 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8314 } 8315 } 8316 8317 // special case v_mac_{f16, f32}: 8318 // it has src2 register operand that is tied to dst operand 8319 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8320 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8321 auto it = Inst.begin(); 8322 std::advance( 8323 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8324 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8325 } 8326 } 8327 8328 //===----------------------------------------------------------------------===// 8329 // mAI 8330 //===----------------------------------------------------------------------===// 8331 8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8333 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8334 } 8335 8336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8337 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8338 } 8339 8340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8341 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8342 } 8343 8344 /// Force static initialization. 8345 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8346 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8347 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8348 } 8349 8350 #define GET_REGISTER_MATCHER 8351 #define GET_MATCHER_IMPLEMENTATION 8352 #define GET_MNEMONIC_SPELL_CHECKER 8353 #define GET_MNEMONIC_CHECKER 8354 #include "AMDGPUGenAsmMatcher.inc" 8355 8356 // This fuction should be defined after auto-generated include so that we have 8357 // MatchClassKind enum defined 8358 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8359 unsigned Kind) { 8360 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8361 // But MatchInstructionImpl() expects to meet token and fails to validate 8362 // operand. This method checks if we are given immediate operand but expect to 8363 // get corresponding token. 8364 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8365 switch (Kind) { 8366 case MCK_addr64: 8367 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8368 case MCK_gds: 8369 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8370 case MCK_lds: 8371 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8372 case MCK_idxen: 8373 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8374 case MCK_offen: 8375 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8376 case MCK_SSrcB32: 8377 // When operands have expression values, they will return true for isToken, 8378 // because it is not possible to distinguish between a token and an 8379 // expression at parse time. MatchInstructionImpl() will always try to 8380 // match an operand as a token, when isToken returns true, and when the 8381 // name of the expression is not a valid token, the match will fail, 8382 // so we need to handle it here. 8383 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8384 case MCK_SSrcF32: 8385 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8386 case MCK_SoppBrTarget: 8387 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8388 case MCK_VReg32OrOff: 8389 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8390 case MCK_InterpSlot: 8391 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8392 case MCK_Attr: 8393 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8394 case MCK_AttrChan: 8395 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8396 case MCK_ImmSMEMOffset: 8397 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8398 case MCK_SReg_64: 8399 case MCK_SReg_64_XEXEC: 8400 // Null is defined as a 32-bit register but 8401 // it should also be enabled with 64-bit operands. 8402 // The following code enables it for SReg_64 operands 8403 // used as source and destination. Remaining source 8404 // operands are handled in isInlinableImm. 8405 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8406 default: 8407 return Match_InvalidOperand; 8408 } 8409 } 8410 8411 //===----------------------------------------------------------------------===// 8412 // endpgm 8413 //===----------------------------------------------------------------------===// 8414 8415 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8416 SMLoc S = getLoc(); 8417 int64_t Imm = 0; 8418 8419 if (!parseExpr(Imm)) { 8420 // The operand is optional, if not present default to 0 8421 Imm = 0; 8422 } 8423 8424 if (!isUInt<16>(Imm)) { 8425 Error(S, "expected a 16-bit value"); 8426 return MatchOperand_ParseFail; 8427 } 8428 8429 Operands.push_back( 8430 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8431 return MatchOperand_Success; 8432 } 8433 8434 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8435